472aeafc57
GitOrigin-RevId: c31898adf5a8ed202ce5bea9f347b1c6871f32d1
189 lines
4.8 KiB
Nix
189 lines
4.8 KiB
Nix
{
|
|
lib,
|
|
stdenv,
|
|
buildPythonPackage,
|
|
fetchFromGitHub,
|
|
|
|
# build-system
|
|
setuptools,
|
|
|
|
# dependencies
|
|
click,
|
|
cloudpickle,
|
|
fsspec,
|
|
importlib-metadata,
|
|
packaging,
|
|
partd,
|
|
pyyaml,
|
|
toolz,
|
|
|
|
# optional-dependencies
|
|
numpy,
|
|
pyarrow,
|
|
lz4,
|
|
pandas,
|
|
distributed,
|
|
bokeh,
|
|
jinja2,
|
|
|
|
# tests
|
|
arrow-cpp,
|
|
dask-expr,
|
|
hypothesis,
|
|
pytest-asyncio,
|
|
pytest-rerunfailures,
|
|
pytest-xdist,
|
|
pytestCheckHook,
|
|
}:
|
|
|
|
let
|
|
self = buildPythonPackage rec {
|
|
pname = "dask";
|
|
version = "2024.9.1";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "dask";
|
|
repo = "dask";
|
|
rev = "refs/tags/${version}";
|
|
hash = "sha256-lbWV6qgLQ8itJsnz7ojrgfrO12+AwNe1/DJvxBo5A+Q=";
|
|
};
|
|
|
|
build-system = [ setuptools ];
|
|
|
|
dependencies = [
|
|
click
|
|
cloudpickle
|
|
fsspec
|
|
packaging
|
|
partd
|
|
pyyaml
|
|
importlib-metadata
|
|
toolz
|
|
];
|
|
|
|
optional-dependencies = lib.fix (self: {
|
|
array = [ numpy ];
|
|
complete = [
|
|
pyarrow
|
|
lz4
|
|
] ++ self.array ++ self.dataframe ++ self.distributed ++ self.diagnostics;
|
|
dataframe = [
|
|
# dask-expr -> circular dependency with dask-expr
|
|
numpy
|
|
pandas
|
|
];
|
|
distributed = [ distributed ];
|
|
diagnostics = [
|
|
bokeh
|
|
jinja2
|
|
];
|
|
});
|
|
|
|
nativeCheckInputs =
|
|
[
|
|
dask-expr
|
|
pytestCheckHook
|
|
pytest-rerunfailures
|
|
pytest-xdist
|
|
# from panda[test]
|
|
hypothesis
|
|
pytest-asyncio
|
|
]
|
|
++ self.optional-dependencies.array
|
|
++ self.optional-dependencies.dataframe
|
|
++ lib.optionals (!arrow-cpp.meta.broken) [
|
|
# support is sparse on aarch64
|
|
pyarrow
|
|
];
|
|
|
|
dontUseSetuptoolsCheck = true;
|
|
|
|
postPatch = ''
|
|
# versioneer hack to set version of GitHub package
|
|
echo "def get_versions(): return {'dirty': False, 'error': None, 'full-revisionid': None, 'version': '${version}'}" > dask/_version.py
|
|
|
|
substituteInPlace setup.py \
|
|
--replace-fail "import versioneer" "" \
|
|
--replace-fail "version=versioneer.get_version()," "version='${version}'," \
|
|
--replace-fail "cmdclass=versioneer.get_cmdclass()," ""
|
|
|
|
substituteInPlace pyproject.toml \
|
|
--replace-fail ', "versioneer[toml]==0.29"' "" \
|
|
--replace-fail " --durations=10" "" \
|
|
--replace-fail " --cov-config=pyproject.toml" "" \
|
|
--replace-fail "\"-v" "\" "
|
|
'';
|
|
|
|
pytestFlagsArray = [
|
|
# Rerun failed tests up to three times
|
|
"--reruns 3"
|
|
# Don't run tests that require network access
|
|
"-m 'not network'"
|
|
];
|
|
|
|
disabledTests =
|
|
lib.optionals stdenv.hostPlatform.isDarwin [
|
|
# Test requires features of python3Packages.psutil that are
|
|
# blocked in sandboxed-builds
|
|
"test_auto_blocksize_csv"
|
|
# AttributeError: 'str' object has no attribute 'decode'
|
|
"test_read_dir_nometa"
|
|
]
|
|
++ lib.optionals (stdenv.hostPlatform.isDarwin && stdenv.hostPlatform.isAarch64) [
|
|
# concurrent.futures.process.BrokenProcessPool: A process in the process pool terminated abpruptly...
|
|
"test_foldby_tree_reduction"
|
|
"test_to_bag"
|
|
]
|
|
++ [
|
|
# https://github.com/dask/dask/issues/10347#issuecomment-1589683941
|
|
"test_concat_categorical"
|
|
# AttributeError: 'ArrowStringArray' object has no attribute 'tobytes'. Did you mean: 'nbytes'?
|
|
"test_dot"
|
|
"test_dot_nan"
|
|
"test_merge_column_with_nulls"
|
|
# FileNotFoundError: [Errno 2] No such file or directory: '/build/tmp301jryv_/createme/0.part'
|
|
"test_to_csv_nodir"
|
|
"test_to_json_results"
|
|
# FutureWarning: Those tests should be working fine when pandas will have been upgraded to 2.1.1
|
|
"test_apply"
|
|
"test_apply_infer_columns"
|
|
];
|
|
|
|
__darwinAllowLocalNetworking = true;
|
|
|
|
pythonImportsCheck = [
|
|
"dask"
|
|
"dask.bag"
|
|
"dask.bytes"
|
|
"dask.diagnostics"
|
|
];
|
|
|
|
doCheck = false;
|
|
|
|
# Enable tests via passthru to avoid cyclic dependency with dask-expr.
|
|
passthru.tests = {
|
|
check = self.overridePythonAttrs (old: {
|
|
doCheck = true;
|
|
pythonImportsCheck = [
|
|
# Requires the `dask.optional-dependencies.array` that are only in `nativeCheckInputs`
|
|
"dask.array"
|
|
# Requires the `dask.optional-dependencies.dataframe` that are only in `nativeCheckInputs`
|
|
"dask.dataframe"
|
|
"dask.dataframe.io"
|
|
"dask.dataframe.tseries"
|
|
] ++ old.pythonImportsCheck;
|
|
});
|
|
};
|
|
|
|
meta = {
|
|
description = "Minimal task scheduling abstraction";
|
|
mainProgram = "dask";
|
|
homepage = "https://dask.org/";
|
|
changelog = "https://docs.dask.org/en/latest/changelog.html";
|
|
license = lib.licenses.bsd3;
|
|
maintainers = with lib.maintainers; [ GaetanLepage ];
|
|
};
|
|
};
|
|
in
|
|
self
|