2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
aiohttp,
|
|
|
|
buildPythonPackage,
|
|
|
|
dill,
|
|
|
|
fetchFromGitHub,
|
|
|
|
fsspec,
|
|
|
|
huggingface-hub,
|
|
|
|
importlib-metadata,
|
|
|
|
multiprocess,
|
|
|
|
numpy,
|
|
|
|
packaging,
|
|
|
|
pandas,
|
|
|
|
pyarrow,
|
|
|
|
pythonOlder,
|
|
|
|
requests,
|
|
|
|
responses,
|
|
|
|
tqdm,
|
|
|
|
xxhash,
|
2020-09-25 04:45:31 +00:00
|
|
|
}:
|
|
|
|
|
|
|
|
buildPythonPackage rec {
|
|
|
|
pname = "datasets";
|
2024-07-27 06:49:29 +00:00
|
|
|
version = "2.20.0";
|
2022-04-27 09:35:20 +00:00
|
|
|
format = "setuptools";
|
|
|
|
|
2023-10-09 19:29:22 +00:00
|
|
|
disabled = pythonOlder "3.8";
|
2020-09-25 04:45:31 +00:00
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "huggingface";
|
|
|
|
repo = pname;
|
2022-08-12 12:06:08 +00:00
|
|
|
rev = "refs/tags/${version}";
|
2024-07-27 06:49:29 +00:00
|
|
|
hash = "sha256-9mB4RXJVkmaK+fLEmyZAdf64YKGoAhE3RzMoj4/8K98=";
|
2020-09-25 04:45:31 +00:00
|
|
|
};
|
|
|
|
|
2024-01-13 08:15:51 +00:00
|
|
|
# remove pyarrow<14.0.1 vulnerability fix
|
|
|
|
postPatch = ''
|
|
|
|
substituteInPlace src/datasets/features/features.py \
|
|
|
|
--replace "import pyarrow_hotfix" "#import pyarrow_hotfix"
|
|
|
|
'';
|
|
|
|
|
2020-09-25 04:45:31 +00:00
|
|
|
propagatedBuildInputs = [
|
2022-04-27 09:35:20 +00:00
|
|
|
aiohttp
|
2020-09-25 04:45:31 +00:00
|
|
|
dill
|
2021-03-19 17:17:44 +00:00
|
|
|
fsspec
|
|
|
|
huggingface-hub
|
2020-11-15 13:44:38 +00:00
|
|
|
multiprocess
|
2020-09-25 04:45:31 +00:00
|
|
|
numpy
|
2022-04-27 09:35:20 +00:00
|
|
|
packaging
|
2020-09-25 04:45:31 +00:00
|
|
|
pandas
|
|
|
|
pyarrow
|
|
|
|
requests
|
2022-04-27 09:35:20 +00:00
|
|
|
responses
|
2020-09-25 04:45:31 +00:00
|
|
|
tqdm
|
|
|
|
xxhash
|
2024-06-05 15:53:02 +00:00
|
|
|
] ++ lib.optionals (pythonOlder "3.8") [ importlib-metadata ];
|
2020-09-25 04:45:31 +00:00
|
|
|
|
2023-10-09 19:29:22 +00:00
|
|
|
# Tests require pervasive internet access
|
2020-09-25 04:45:31 +00:00
|
|
|
doCheck = false;
|
|
|
|
|
2023-10-09 19:29:22 +00:00
|
|
|
# Module import will attempt to create a cache directory
|
2020-09-25 04:45:31 +00:00
|
|
|
postFixup = "export HF_MODULES_CACHE=$TMPDIR";
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
pythonImportsCheck = [ "datasets" ];
|
2020-09-25 04:45:31 +00:00
|
|
|
|
|
|
|
meta = with lib; {
|
2022-04-27 09:35:20 +00:00
|
|
|
description = "Open-access datasets and evaluation metrics for natural language processing";
|
2024-04-21 15:54:59 +00:00
|
|
|
mainProgram = "datasets-cli";
|
2020-09-25 04:45:31 +00:00
|
|
|
homepage = "https://github.com/huggingface/datasets";
|
|
|
|
changelog = "https://github.com/huggingface/datasets/releases/tag/${version}";
|
|
|
|
license = licenses.asl20;
|
|
|
|
platforms = platforms.unix;
|
2024-07-31 10:19:44 +00:00
|
|
|
maintainers = [ ];
|
2020-09-25 04:45:31 +00:00
|
|
|
};
|
|
|
|
}
|