2023-08-22 20:05:09 +00:00
|
|
|
{ lib, newScope, fetchFromGitHub, unzip, stdenvNoCC }:
|
|
|
|
let
|
|
|
|
base = {
|
2024-09-19 14:19:46 +00:00
|
|
|
version = "0-unstable-2024-07-29";
|
2023-08-22 20:05:09 +00:00
|
|
|
nativeBuildInputs = [ unzip ];
|
|
|
|
dontBuild = true;
|
|
|
|
meta = with lib; {
|
|
|
|
description = "NLTK Data";
|
|
|
|
homepage = "https://github.com/nltk/nltk_data";
|
|
|
|
license = licenses.asl20;
|
|
|
|
platforms = platforms.all;
|
|
|
|
maintainers = with maintainers; [ happysalada ];
|
|
|
|
};
|
|
|
|
};
|
|
|
|
makeNltkDataPackage = {pname, location, hash}:
|
|
|
|
let
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "nltk";
|
|
|
|
repo = "nltk_data";
|
2024-09-19 14:19:46 +00:00
|
|
|
rev = "cfe82914f3c2d24363687f1db3b05e8b9f687e2b";
|
2023-08-22 20:05:09 +00:00
|
|
|
inherit hash;
|
2023-10-09 19:29:22 +00:00
|
|
|
sparseCheckout = [ "packages/${location}/${pname}.zip" ];
|
2023-08-22 20:05:09 +00:00
|
|
|
};
|
|
|
|
in
|
|
|
|
stdenvNoCC.mkDerivation (base // {
|
|
|
|
inherit pname src;
|
2024-09-19 14:19:46 +00:00
|
|
|
inherit (base) version;
|
2023-08-22 20:05:09 +00:00
|
|
|
installPhase = ''
|
|
|
|
runHook preInstall
|
|
|
|
|
|
|
|
mkdir -p $out
|
2023-10-09 19:29:22 +00:00
|
|
|
unzip ${src}/packages/${location}/${pname}.zip
|
|
|
|
mkdir -p $out/${location}
|
|
|
|
cp -R ${pname}/ $out/${location}
|
2023-08-22 20:05:09 +00:00
|
|
|
|
|
|
|
runHook postInstall
|
|
|
|
'';
|
|
|
|
});
|
|
|
|
in
|
|
|
|
lib.makeScope newScope (self: {
|
2024-09-19 14:19:46 +00:00
|
|
|
punkt = makeNltkDataPackage {
|
2023-08-22 20:05:09 +00:00
|
|
|
pname = "punkt";
|
2023-10-09 19:29:22 +00:00
|
|
|
location = "tokenizers";
|
2024-09-19 14:19:46 +00:00
|
|
|
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
|
|
|
|
};
|
|
|
|
punkt_tab = makeNltkDataPackage {
|
|
|
|
pname = "punkt_tab";
|
|
|
|
location = "tokenizers";
|
|
|
|
hash = "sha256-OzMkruoYbFKqzuimOXIpE5lhHz8tmSqOFoLT+fjdTVg=";
|
|
|
|
};
|
|
|
|
averaged_perceptron_tagger = makeNltkDataPackage {
|
2023-08-22 20:05:09 +00:00
|
|
|
pname = "averaged_perceptron_tagger";
|
2023-10-09 19:29:22 +00:00
|
|
|
location = "taggers";
|
2024-09-19 14:19:46 +00:00
|
|
|
hash = "sha256-tl3Cn2okhBkUtTXvAmFRx72Brez6iTGRdmFTwFmpk3M=";
|
|
|
|
};
|
|
|
|
snowball_data = makeNltkDataPackage {
|
2024-02-07 01:22:34 +00:00
|
|
|
pname = "snowball_data";
|
|
|
|
location = "stemmers";
|
2024-09-19 14:19:46 +00:00
|
|
|
hash = "sha256-mNefwOPVJGz9kXV3LV4DuV7FJpNir/Nwg4ujd0CogEk=";
|
|
|
|
};
|
|
|
|
stopwords = makeNltkDataPackage {
|
2023-10-09 19:29:22 +00:00
|
|
|
pname = "stopwords";
|
|
|
|
location = "corpora";
|
2024-09-19 14:19:46 +00:00
|
|
|
hash = "sha256-8lMjW5YI8h6dHJ/83HVY2OYGDyKPpgkUAKPISiAKqqk=";
|
|
|
|
};
|
2023-08-22 20:05:09 +00:00
|
|
|
})
|