2022-02-20 05:27:41 +00:00
|
|
|
{ lib
|
|
|
|
, stdenv
|
|
|
|
, buildPythonPackage
|
|
|
|
, python
|
|
|
|
, isPy3k
|
|
|
|
, arrow-cpp
|
|
|
|
, cffi
|
|
|
|
, cloudpickle
|
|
|
|
, cmake
|
|
|
|
, cython
|
|
|
|
, fsspec
|
|
|
|
, hypothesis
|
|
|
|
, numpy
|
|
|
|
, pandas
|
|
|
|
, pytestCheckHook
|
|
|
|
, pytest-lazy-fixture
|
|
|
|
, pkg-config
|
|
|
|
, scipy
|
|
|
|
, setuptools-scm
|
|
|
|
, six
|
|
|
|
}:
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
let
|
2021-12-06 16:07:01 +00:00
|
|
|
zero_or_one = cond: if cond then 1 else 0;
|
|
|
|
|
2020-05-29 06:06:01 +00:00
|
|
|
_arrow-cpp = arrow-cpp.override { python3 = python; };
|
2020-04-24 23:36:52 +00:00
|
|
|
in
|
|
|
|
|
|
|
|
buildPythonPackage rec {
|
|
|
|
pname = "pyarrow";
|
2020-05-29 06:06:01 +00:00
|
|
|
disabled = !isPy3k;
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
inherit (_arrow-cpp) version src;
|
|
|
|
|
|
|
|
sourceRoot = "apache-arrow-${version}/python";
|
|
|
|
|
2021-06-04 09:07:49 +00:00
|
|
|
nativeBuildInputs = [ cmake cython pkg-config setuptools-scm ];
|
2022-02-20 05:27:41 +00:00
|
|
|
propagatedBuildInputs = [ numpy six cloudpickle scipy fsspec cffi ];
|
|
|
|
checkInputs = [
|
|
|
|
hypothesis
|
|
|
|
pandas
|
|
|
|
pytestCheckHook
|
|
|
|
pytest-lazy-fixture
|
|
|
|
];
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
PYARROW_BUILD_TYPE = "release";
|
2021-12-06 16:07:01 +00:00
|
|
|
|
|
|
|
PYARROW_WITH_DATASET = zero_or_one true;
|
|
|
|
PYARROW_WITH_FLIGHT = zero_or_one _arrow-cpp.enableFlight;
|
|
|
|
PYARROW_WITH_PARQUET = zero_or_one true;
|
2022-02-20 05:27:41 +00:00
|
|
|
PYARROW_WITH_HDFS = zero_or_one true;
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
PYARROW_CMAKE_OPTIONS = [
|
|
|
|
"-DCMAKE_INSTALL_RPATH=${ARROW_HOME}/lib"
|
|
|
|
];
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
ARROW_HOME = _arrow-cpp;
|
|
|
|
PARQUET_HOME = _arrow-cpp;
|
|
|
|
|
2021-12-06 16:07:01 +00:00
|
|
|
ARROW_TEST_DATA = lib.optionalString doCheck _arrow-cpp.ARROW_TEST_DATA;
|
|
|
|
|
|
|
|
doCheck = true;
|
2020-04-24 23:36:52 +00:00
|
|
|
dontUseCmakeConfigure = true;
|
|
|
|
|
|
|
|
preBuild = ''
|
|
|
|
export PYARROW_PARALLEL=$NIX_BUILD_CORES
|
|
|
|
'';
|
|
|
|
|
2021-05-03 20:48:10 +00:00
|
|
|
pytestFlagsArray = [
|
|
|
|
# Deselect a single test because pyarrow prints a 2-line error message where
|
|
|
|
# only a single line is expected. The additional line of output comes from
|
|
|
|
# the glog library which is an optional dependency of arrow-cpp that is
|
|
|
|
# enabled in nixpkgs.
|
|
|
|
# Upstream Issue: https://issues.apache.org/jira/browse/ARROW-11393
|
|
|
|
"--deselect=pyarrow/tests/test_memory.py::test_env_var"
|
2021-12-06 16:07:01 +00:00
|
|
|
] ++ lib.optionals stdenv.isDarwin [
|
|
|
|
# Requires loopback networking
|
|
|
|
"--deselect=pyarrow/tests/test_ipc.py::test_socket_"
|
2021-05-03 20:48:10 +00:00
|
|
|
];
|
2021-02-05 17:12:51 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
dontUseSetuptoolsCheck = true;
|
|
|
|
preCheck = ''
|
2022-02-20 05:27:41 +00:00
|
|
|
shopt -s extglob
|
|
|
|
rm -r pyarrow/!(tests)
|
2020-04-24 23:36:52 +00:00
|
|
|
'';
|
|
|
|
|
2022-02-20 05:27:41 +00:00
|
|
|
pythonImportsCheck = map (module: "pyarrow.${module}") [
|
|
|
|
"compute"
|
|
|
|
"csv"
|
|
|
|
"dataset"
|
|
|
|
"flight"
|
|
|
|
"fs"
|
|
|
|
"hdfs"
|
|
|
|
"json"
|
|
|
|
"parquet"
|
|
|
|
];
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
meta = with lib; {
|
|
|
|
description = "A cross-language development platform for in-memory data";
|
|
|
|
homepage = "https://arrow.apache.org/";
|
2021-06-28 23:13:55 +00:00
|
|
|
license = licenses.asl20;
|
2020-04-24 23:36:52 +00:00
|
|
|
platforms = platforms.unix;
|
2021-12-06 16:07:01 +00:00
|
|
|
maintainers = with maintainers; [ veprbl cpcloud ];
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
}
|