252 lines
5.5 KiB
Nix
252 lines
5.5 KiB
Nix
|
{
|
||
|
lib,
|
||
|
buildPythonPackage,
|
||
|
fetchFromGitHub,
|
||
|
|
||
|
# build-system
|
||
|
cython,
|
||
|
poetry-core,
|
||
|
setuptools,
|
||
|
|
||
|
# dependencies
|
||
|
cachetools,
|
||
|
click,
|
||
|
fsspec,
|
||
|
mmh3,
|
||
|
pydantic,
|
||
|
pyparsing,
|
||
|
ray,
|
||
|
requests,
|
||
|
rich,
|
||
|
sortedcontainers,
|
||
|
strictyaml,
|
||
|
tenacity,
|
||
|
zstandard,
|
||
|
|
||
|
# optional-dependencies
|
||
|
adlfs,
|
||
|
# getdaft,
|
||
|
duckdb,
|
||
|
pyarrow,
|
||
|
boto3,
|
||
|
gcsfs,
|
||
|
mypy-boto3-glue,
|
||
|
thrift,
|
||
|
pandas,
|
||
|
s3fs,
|
||
|
python-snappy,
|
||
|
psycopg2-binary,
|
||
|
sqlalchemy,
|
||
|
|
||
|
# tests
|
||
|
azure-core,
|
||
|
azure-storage-blob,
|
||
|
fastavro,
|
||
|
moto,
|
||
|
pyspark,
|
||
|
pytestCheckHook,
|
||
|
pytest-lazy-fixture,
|
||
|
pytest-mock,
|
||
|
pytest-timeout,
|
||
|
requests-mock,
|
||
|
pythonOlder,
|
||
|
}:
|
||
|
|
||
|
buildPythonPackage rec {
|
||
|
pname = "iceberg-python";
|
||
|
version = "0.8.1";
|
||
|
pyproject = true;
|
||
|
|
||
|
src = fetchFromGitHub {
|
||
|
owner = "apache";
|
||
|
repo = "iceberg-python";
|
||
|
tag = "pyiceberg-${version}";
|
||
|
hash = "sha256-L3YlOtzJv9R4TLeJGzfMQ+0nYtQEsqmgNZpW9B6vVAI=";
|
||
|
};
|
||
|
|
||
|
patches = lib.optionals (pythonOlder "3.12") [
|
||
|
# Build script fails to build the cython extension on python 3.11 (no issues with python 3.12):
|
||
|
# distutils.errors.DistutilsSetupError: each element of 'ext_modules' option must be an Extension instance or 2-tuple
|
||
|
# This error vanishes if Cython and setuptools imports are swapped
|
||
|
# https://stackoverflow.com/a/53356077/11196710
|
||
|
./reorder-imports-in-build-script.patch
|
||
|
];
|
||
|
|
||
|
build-system = [
|
||
|
cython
|
||
|
poetry-core
|
||
|
setuptools
|
||
|
];
|
||
|
|
||
|
# Prevents the cython build to fail silently
|
||
|
env.CIBUILDWHEEL = "1";
|
||
|
|
||
|
dependencies = [
|
||
|
cachetools
|
||
|
click
|
||
|
fsspec
|
||
|
mmh3
|
||
|
pydantic
|
||
|
pyparsing
|
||
|
ray
|
||
|
requests
|
||
|
rich
|
||
|
sortedcontainers
|
||
|
strictyaml
|
||
|
tenacity
|
||
|
zstandard
|
||
|
];
|
||
|
|
||
|
optional-dependencies = {
|
||
|
adlfs = [
|
||
|
adlfs
|
||
|
];
|
||
|
daft = [
|
||
|
# getdaft
|
||
|
];
|
||
|
duckdb = [
|
||
|
duckdb
|
||
|
pyarrow
|
||
|
];
|
||
|
dynamodb = [
|
||
|
boto3
|
||
|
];
|
||
|
gcsfs = [
|
||
|
gcsfs
|
||
|
];
|
||
|
glue = [
|
||
|
boto3
|
||
|
mypy-boto3-glue
|
||
|
];
|
||
|
hive = [
|
||
|
thrift
|
||
|
];
|
||
|
pandas = [
|
||
|
pandas
|
||
|
pyarrow
|
||
|
];
|
||
|
pyarrow = [
|
||
|
pyarrow
|
||
|
];
|
||
|
ray = [
|
||
|
pandas
|
||
|
pyarrow
|
||
|
ray
|
||
|
];
|
||
|
s3fs = [
|
||
|
s3fs
|
||
|
];
|
||
|
snappy = [
|
||
|
python-snappy
|
||
|
];
|
||
|
sql-postgres = [
|
||
|
psycopg2-binary
|
||
|
sqlalchemy
|
||
|
];
|
||
|
sql-sqlite = [
|
||
|
sqlalchemy
|
||
|
];
|
||
|
zstandard = [
|
||
|
zstandard
|
||
|
];
|
||
|
};
|
||
|
|
||
|
pythonImportsCheck = [
|
||
|
"pyiceberg"
|
||
|
# Compiled avro decoder (cython)
|
||
|
"pyiceberg.avro.decoder_fast"
|
||
|
];
|
||
|
|
||
|
nativeCheckInputs = [
|
||
|
azure-core
|
||
|
azure-storage-blob
|
||
|
boto3
|
||
|
fastavro
|
||
|
moto
|
||
|
mypy-boto3-glue
|
||
|
pandas
|
||
|
pyarrow
|
||
|
pyspark
|
||
|
pytest-lazy-fixture
|
||
|
pytest-mock
|
||
|
pytest-timeout
|
||
|
pytestCheckHook
|
||
|
requests-mock
|
||
|
s3fs
|
||
|
sqlalchemy
|
||
|
thrift
|
||
|
] ++ moto.optional-dependencies.server;
|
||
|
|
||
|
disabledTestPaths = [
|
||
|
# Several errors:
|
||
|
# - FileNotFoundError: [Errno 2] No such file or directory: '/nix/store/...-python3.12-pyspark-3.5.3/lib/python3.12/site-packages/pyspark/./bin/spark-submit'
|
||
|
# - requests.exceptions.ConnectionError: HTTPConnectionPool(host='localhost', port=8181): Max retries exceeded with url: /v1/config
|
||
|
# - thrift.transport.TTransport.TTransportException: Could not connect to any of [('127.0.0.1', 9083)]
|
||
|
"tests/integration"
|
||
|
];
|
||
|
|
||
|
disabledTests = [
|
||
|
# botocore.exceptions.EndpointConnectionError: Could not connect to the endpoint URL
|
||
|
"test_checking_if_a_file_exists"
|
||
|
"test_closing_a_file"
|
||
|
"test_fsspec_file_tell"
|
||
|
"test_fsspec_getting_length_of_file"
|
||
|
"test_fsspec_pickle_round_trip_s3"
|
||
|
"test_fsspec_raise_on_opening_file_not_found"
|
||
|
"test_fsspec_read_specified_bytes_for_file"
|
||
|
"test_fsspec_write_and_read_file"
|
||
|
"test_writing_avro_file"
|
||
|
|
||
|
# Require unpackaged gcsfs
|
||
|
"test_fsspec_converting_an_outputfile_to_an_inputfile_gcs"
|
||
|
"test_fsspec_new_input_file_gcs"
|
||
|
"test_fsspec_new_output_file_gcs"
|
||
|
"test_fsspec_pickle_roundtrip_gcs"
|
||
|
|
||
|
# Timeout (network access)
|
||
|
"test_fsspec_converting_an_outputfile_to_an_inputfile_adls"
|
||
|
"test_fsspec_new_abfss_output_file_adls"
|
||
|
"test_fsspec_new_input_file_adls"
|
||
|
"test_fsspec_pickle_round_trip_aldfs"
|
||
|
|
||
|
# TypeError: pyarrow.lib.large_list() takes no keyword argument
|
||
|
# From tests/io/test_pyarrow_stats.py:
|
||
|
"test_bounds"
|
||
|
"test_column_metrics_mode"
|
||
|
"test_column_sizes"
|
||
|
"test_metrics_mode_counts"
|
||
|
"test_metrics_mode_full"
|
||
|
"test_metrics_mode_non_default_trunc"
|
||
|
"test_metrics_mode_none"
|
||
|
"test_null_and_nan_counts"
|
||
|
"test_offsets"
|
||
|
"test_read_missing_statistics"
|
||
|
"test_record_count"
|
||
|
"test_value_counts"
|
||
|
"test_write_and_read_stats_schema"
|
||
|
# From tests/io/test_pyarrow.py:
|
||
|
"test_list_type_to_pyarrow"
|
||
|
"test_projection_add_column"
|
||
|
"test_projection_list_of_structs"
|
||
|
"test_read_list"
|
||
|
"test_schema_compatible_missing_nullable_field_nested"
|
||
|
"test_schema_compatible_nested"
|
||
|
"test_schema_mismatch_missing_required_field_nested"
|
||
|
"test_schema_to_pyarrow_schema_exclude_field_ids"
|
||
|
"test_schema_to_pyarrow_schema_include_field_ids"
|
||
|
# From tests/io/test_pyarrow_visitor.py
|
||
|
"test_round_schema_conversion_nested"
|
||
|
|
||
|
# Hangs forever (from tests/io/test_pyarrow.py)
|
||
|
"test_getting_length_of_file_gcs"
|
||
|
];
|
||
|
|
||
|
meta = {
|
||
|
description = "Python library for programmatic access to Apache Iceberg";
|
||
|
homepage = "https://github.com/apache/iceberg-python";
|
||
|
changelog = "https://github.com/apache/iceberg-python/releases/tag/pyiceberg-${version}";
|
||
|
license = lib.licenses.asl20;
|
||
|
maintainers = with lib.maintainers; [ GaetanLepage ];
|
||
|
};
|
||
|
}
|