2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
stdenv,
|
|
|
|
buildPythonPackage,
|
|
|
|
python,
|
|
|
|
pythonAtLeast,
|
|
|
|
pythonOlder,
|
|
|
|
arrow-cpp,
|
|
|
|
cffi,
|
|
|
|
cloudpickle,
|
|
|
|
cmake,
|
|
|
|
cython_0,
|
|
|
|
fsspec,
|
|
|
|
hypothesis,
|
|
|
|
numpy,
|
|
|
|
pandas,
|
|
|
|
pytestCheckHook,
|
|
|
|
pytest-lazy-fixture,
|
|
|
|
pkg-config,
|
|
|
|
setuptools,
|
|
|
|
setuptools-scm,
|
|
|
|
oldest-supported-numpy,
|
2022-02-20 05:27:41 +00:00
|
|
|
}:
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
let
|
2021-12-06 16:07:01 +00:00
|
|
|
zero_or_one = cond: if cond then 1 else 0;
|
2020-04-24 23:36:52 +00:00
|
|
|
in
|
|
|
|
|
|
|
|
buildPythonPackage rec {
|
|
|
|
pname = "pyarrow";
|
2023-02-16 17:41:37 +00:00
|
|
|
inherit (arrow-cpp) version src;
|
2024-01-13 08:15:51 +00:00
|
|
|
pyproject = true;
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2022-06-26 10:26:21 +00:00
|
|
|
disabled = pythonOlder "3.7";
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
sourceRoot = "apache-arrow-${version}/python";
|
|
|
|
|
2022-06-26 10:26:21 +00:00
|
|
|
nativeBuildInputs = [
|
|
|
|
cmake
|
2024-04-21 15:54:59 +00:00
|
|
|
cython_0
|
2022-06-26 10:26:21 +00:00
|
|
|
pkg-config
|
2024-01-13 08:15:51 +00:00
|
|
|
setuptools
|
2022-06-26 10:26:21 +00:00
|
|
|
setuptools-scm
|
2024-01-13 08:15:51 +00:00
|
|
|
oldest-supported-numpy
|
2022-06-26 10:26:21 +00:00
|
|
|
];
|
|
|
|
|
2023-02-16 17:41:37 +00:00
|
|
|
buildInputs = [ arrow-cpp ];
|
|
|
|
|
2022-06-26 10:26:21 +00:00
|
|
|
propagatedBuildInputs = [
|
|
|
|
cffi
|
2024-02-29 20:09:43 +00:00
|
|
|
numpy
|
|
|
|
];
|
|
|
|
|
|
|
|
checkInputs = [
|
2022-06-26 10:26:21 +00:00
|
|
|
cloudpickle
|
|
|
|
fsspec
|
|
|
|
];
|
|
|
|
|
2023-02-02 18:25:31 +00:00
|
|
|
nativeCheckInputs = [
|
2022-02-20 05:27:41 +00:00
|
|
|
hypothesis
|
|
|
|
pandas
|
|
|
|
pytestCheckHook
|
|
|
|
pytest-lazy-fixture
|
|
|
|
];
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
PYARROW_BUILD_TYPE = "release";
|
2021-12-06 16:07:01 +00:00
|
|
|
|
|
|
|
PYARROW_WITH_DATASET = zero_or_one true;
|
2023-02-16 17:41:37 +00:00
|
|
|
PYARROW_WITH_FLIGHT = zero_or_one arrow-cpp.enableFlight;
|
2022-02-20 05:27:41 +00:00
|
|
|
PYARROW_WITH_HDFS = zero_or_one true;
|
2022-05-18 14:49:53 +00:00
|
|
|
PYARROW_WITH_PARQUET = zero_or_one true;
|
2023-02-16 17:41:37 +00:00
|
|
|
PYARROW_WITH_PARQUET_ENCRYPTION = zero_or_one true;
|
|
|
|
PYARROW_WITH_S3 = zero_or_one arrow-cpp.enableS3;
|
|
|
|
PYARROW_WITH_GCS = zero_or_one arrow-cpp.enableGcs;
|
|
|
|
PYARROW_BUNDLE_ARROW_CPP_HEADERS = zero_or_one false;
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
PYARROW_CMAKE_OPTIONS = [ "-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib" ];
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2023-02-16 17:41:37 +00:00
|
|
|
ARROW_HOME = arrow-cpp;
|
|
|
|
PARQUET_HOME = arrow-cpp;
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2023-02-16 17:41:37 +00:00
|
|
|
ARROW_TEST_DATA = lib.optionalString doCheck arrow-cpp.ARROW_TEST_DATA;
|
2021-12-06 16:07:01 +00:00
|
|
|
|
|
|
|
doCheck = true;
|
2022-06-26 10:26:21 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
dontUseCmakeConfigure = true;
|
|
|
|
|
2022-08-12 12:06:08 +00:00
|
|
|
__darwinAllowLocalNetworking = true;
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
preBuild = ''
|
|
|
|
export PYARROW_PARALLEL=$NIX_BUILD_CORES
|
|
|
|
'';
|
|
|
|
|
2023-02-16 17:41:37 +00:00
|
|
|
postInstall = ''
|
|
|
|
# copy the pyarrow C++ header files to the appropriate location
|
|
|
|
pyarrow_include="$out/${python.sitePackages}/pyarrow/include"
|
|
|
|
mkdir -p "$pyarrow_include/arrow/python"
|
|
|
|
find "$PWD/pyarrow/src/arrow" -type f -name '*.h' -exec cp {} "$pyarrow_include/arrow/python" \;
|
|
|
|
'';
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
pytestFlagsArray =
|
|
|
|
[
|
|
|
|
# A couple of tests are missing fixture imports, luckily pytest offers a
|
|
|
|
# clean solution.
|
|
|
|
"--fixtures pyarrow/tests/conftest.py"
|
|
|
|
# Deselect a single test because pyarrow prints a 2-line error message where
|
|
|
|
# only a single line is expected. The additional line of output comes from
|
|
|
|
# the glog library which is an optional dependency of arrow-cpp that is
|
|
|
|
# enabled in nixpkgs.
|
|
|
|
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
|
|
|
|
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
|
|
|
|
# these tests require access to s3 via the internet
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_resolve_s3_region"
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws"
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_s3_real_aws_region_selection"
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_s3_options"
|
|
|
|
# Flaky test
|
|
|
|
"--deselect=pyarrow/tests/test_flight.py::test_roundtrip_errors"
|
|
|
|
"--deselect=pyarrow/tests/test_pandas.py::test_threaded_pandas_import"
|
|
|
|
# Flaky test, works locally but not on Hydra
|
|
|
|
"--deselect=pyarrow/tests/test_csv.py::TestThreadedCSVTableRead::test_cancellation"
|
|
|
|
# expects arrow-cpp headers to be bundled
|
|
|
|
"--deselect=pyarrow/tests/test_cpp_internals.py::test_pyarrow_include"
|
|
|
|
]
|
|
|
|
++ lib.optionals stdenv.isDarwin [
|
|
|
|
# Requires loopback networking
|
|
|
|
"--deselect=pyarrow/tests/test_ipc.py::test_socket_"
|
|
|
|
"--deselect=pyarrow/tests/test_flight.py::test_never_sends_data"
|
|
|
|
"--deselect=pyarrow/tests/test_flight.py::test_large_descriptor"
|
|
|
|
"--deselect=pyarrow/tests/test_flight.py::test_large_metadata_client"
|
|
|
|
"--deselect=pyarrow/tests/test_flight.py::test_none_action_side_effect"
|
|
|
|
# fails to compile
|
|
|
|
"--deselect=pyarrow/tests/test_cython.py::test_cython_api"
|
|
|
|
]
|
|
|
|
++ lib.optionals (pythonAtLeast "3.11") [
|
|
|
|
# Repr output is printing number instead of enum name so these tests fail
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_get_file_info"
|
|
|
|
]
|
|
|
|
++ lib.optionals stdenv.isLinux [
|
|
|
|
# this test requires local networking
|
|
|
|
"--deselect=pyarrow/tests/test_fs.py::test_filesystem_from_uri_gcs"
|
|
|
|
];
|
2021-02-05 17:12:51 +00:00
|
|
|
|
2023-02-16 17:41:37 +00:00
|
|
|
disabledTests = [ "GcsFileSystem" ];
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
dontUseSetuptoolsCheck = true;
|
2022-06-26 10:26:21 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
preCheck =
|
|
|
|
''
|
|
|
|
shopt -s extglob
|
|
|
|
rm -r pyarrow/!(conftest.py|tests)
|
|
|
|
mv pyarrow/conftest.py pyarrow/tests/parent_conftest.py
|
|
|
|
substituteInPlace pyarrow/tests/conftest.py --replace ..conftest .parent_conftest
|
|
|
|
''
|
|
|
|
+ lib.optionalString stdenv.isDarwin ''
|
|
|
|
# OSError: [Errno 24] Too many open files
|
|
|
|
ulimit -n 1024
|
|
|
|
'';
|
|
|
|
|
|
|
|
pythonImportsCheck =
|
|
|
|
[ "pyarrow" ]
|
|
|
|
++ map (module: "pyarrow.${module}") [
|
|
|
|
"compute"
|
|
|
|
"csv"
|
|
|
|
"dataset"
|
|
|
|
"feather"
|
|
|
|
"flight"
|
|
|
|
"fs"
|
|
|
|
"json"
|
|
|
|
"parquet"
|
|
|
|
];
|
2022-02-20 05:27:41 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
meta = with lib; {
|
2024-06-20 14:57:18 +00:00
|
|
|
description = "Cross-language development platform for in-memory data";
|
2020-04-24 23:36:52 +00:00
|
|
|
homepage = "https://arrow.apache.org/";
|
2021-06-28 23:13:55 +00:00
|
|
|
license = licenses.asl20;
|
2020-04-24 23:36:52 +00:00
|
|
|
platforms = platforms.unix;
|
2024-06-05 15:53:02 +00:00
|
|
|
maintainers = with maintainers; [
|
|
|
|
veprbl
|
|
|
|
cpcloud
|
|
|
|
];
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
}
|