2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
buildPythonPackage,
|
|
|
|
fetchurl,
|
|
|
|
protobuf,
|
|
|
|
pymorphy3,
|
|
|
|
pymorphy3-dicts-uk,
|
|
|
|
sentencepiece,
|
|
|
|
setuptools,
|
|
|
|
spacy,
|
|
|
|
spacy-pkuseg,
|
|
|
|
spacy-transformers,
|
|
|
|
writeScript,
|
|
|
|
stdenv,
|
|
|
|
jq,
|
|
|
|
nix,
|
|
|
|
moreutils,
|
2023-02-22 10:55:15 +00:00
|
|
|
}:
|
2020-04-24 23:36:52 +00:00
|
|
|
let
|
2024-06-05 15:53:02 +00:00
|
|
|
buildModelPackage =
|
|
|
|
{
|
|
|
|
pname,
|
|
|
|
version,
|
|
|
|
sha256,
|
|
|
|
license,
|
|
|
|
}:
|
2023-03-04 12:14:45 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
let
|
|
|
|
lang = builtins.substring 0 2 pname;
|
2023-03-04 12:14:45 +00:00
|
|
|
requires-protobuf = pname == "fr_dep_news_trf" || pname == "uk_core_news_trf";
|
2023-02-22 10:55:15 +00:00
|
|
|
in
|
|
|
|
buildPythonPackage {
|
|
|
|
inherit pname version;
|
2024-01-13 08:15:51 +00:00
|
|
|
pyproject = true;
|
2023-02-22 10:55:15 +00:00
|
|
|
|
|
|
|
src = fetchurl {
|
|
|
|
url = "https://github.com/explosion/spacy-models/releases/download/${pname}-${version}/${pname}-${version}.tar.gz";
|
|
|
|
inherit sha256;
|
|
|
|
};
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
propagatedBuildInputs =
|
|
|
|
[ spacy ]
|
2023-02-22 10:55:15 +00:00
|
|
|
++ lib.optionals (lib.hasSuffix "_trf" pname) [ spacy-transformers ]
|
2023-03-04 12:14:45 +00:00
|
|
|
++ lib.optionals (lang == "ru") [ pymorphy3 ]
|
2024-06-05 15:53:02 +00:00
|
|
|
++ lib.optionals (lang == "uk") [
|
|
|
|
pymorphy3
|
|
|
|
pymorphy3-dicts-uk
|
|
|
|
]
|
2023-03-04 12:14:45 +00:00
|
|
|
++ lib.optionals (lang == "zh") [ spacy-pkuseg ]
|
2023-02-22 10:55:15 +00:00
|
|
|
++ lib.optionals (pname == "fr_dep_news_trf") [ sentencepiece ];
|
|
|
|
|
2023-03-04 12:14:45 +00:00
|
|
|
postPatch = lib.optionalString requires-protobuf ''
|
2023-02-22 10:55:15 +00:00
|
|
|
substituteInPlace meta.json \
|
2023-03-04 12:14:45 +00:00
|
|
|
--replace "protobuf<3.21.0" "protobuf"
|
2023-02-22 10:55:15 +00:00
|
|
|
'';
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
nativeBuildInputs = [ setuptools ] ++ lib.optionals requires-protobuf [ protobuf ];
|
2023-03-04 12:14:45 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
pythonImportsCheck = [ pname ];
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
passthru.updateScript = writeScript "update-spacy-models" ''
|
|
|
|
#!${stdenv.shell}
|
|
|
|
set -eou pipefail
|
2024-06-05 15:53:02 +00:00
|
|
|
PATH=${
|
|
|
|
lib.makeBinPath [
|
|
|
|
jq
|
|
|
|
nix
|
|
|
|
moreutils
|
|
|
|
]
|
|
|
|
}
|
2021-05-20 23:08:51 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
IFS=. read -r major minor patch <<<"${spacy.version}"
|
|
|
|
spacyVersion="$(echo "$major.$minor.0")"
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
pushd pkgs/development/python-modules/spacy/ || exit
|
2020-08-20 17:08:02 +00:00
|
|
|
|
2023-02-22 10:55:15 +00:00
|
|
|
jq -r '.[] | .pname' models.json | while IFS= read -r pname; do
|
|
|
|
if [ "$(jq --arg pname "$pname" -r '.[] | select(.pname == $pname) | .version' models.json)" == "$spacyVersion" ]; then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
|
|
|
newHash="$(nix-prefetch-url "https://github.com/explosion/spacy-models/releases/download/$pname-$spacyVersion/$pname-$spacyVersion.tar.gz")"
|
|
|
|
jq --arg newHash "$newHash" --arg pname "$pname" --arg spacyVersion "$spacyVersion" \
|
|
|
|
'[(.[] | select(.pname != $pname)), (.[] | select(.pname == $pname) | .sha256 = $newHash | .version = $spacyVersion)] | sort_by(.pname)' \
|
|
|
|
models.json | sponge models.json
|
|
|
|
done
|
|
|
|
|
|
|
|
popd || exit
|
|
|
|
'';
|
|
|
|
|
2024-09-19 14:19:46 +00:00
|
|
|
meta = {
|
2023-02-22 10:55:15 +00:00
|
|
|
description = "Models for the spaCy NLP library";
|
|
|
|
homepage = "https://github.com/explosion/spacy-models";
|
2024-09-19 14:19:46 +00:00
|
|
|
license = lib.licenses.${license};
|
2023-02-22 10:55:15 +00:00
|
|
|
};
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
makeModelSet =
|
2024-09-19 14:19:46 +00:00
|
|
|
models: lib.listToAttrs (map (m: lib.nameValuePair m.pname (buildModelPackage m)) models);
|
2023-02-22 10:55:15 +00:00
|
|
|
in
|
|
|
|
makeModelSet (lib.importJSON ./models.json)
|