2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
stdenv,
|
|
|
|
fetchFromGitHub,
|
|
|
|
buildPythonPackage,
|
|
|
|
pythonOlder,
|
|
|
|
setuptools,
|
|
|
|
regex,
|
2022-01-25 03:21:06 +00:00
|
|
|
}:
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
buildPythonPackage rec {
|
2022-01-25 03:21:06 +00:00
|
|
|
pname = "somajo";
|
2024-09-19 14:19:46 +00:00
|
|
|
version = "2.4.3";
|
2023-11-16 04:20:00 +00:00
|
|
|
pyproject = true;
|
2022-01-25 03:21:06 +00:00
|
|
|
|
2024-09-19 14:19:46 +00:00
|
|
|
disabled = pythonOlder "3.8";
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "tsproisl";
|
2022-01-25 03:21:06 +00:00
|
|
|
repo = "SoMaJo";
|
2022-09-22 12:36:57 +00:00
|
|
|
rev = "refs/tags/v${version}";
|
2024-09-19 14:19:46 +00:00
|
|
|
hash = "sha256-fq891LX6PukUEfrXplulhnisuPX/RqLAQ/5ty/Fvm9k=";
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
|
2024-09-19 14:19:46 +00:00
|
|
|
build-system = [ setuptools ];
|
2023-11-16 04:20:00 +00:00
|
|
|
|
2024-09-19 14:19:46 +00:00
|
|
|
dependencies = [ regex ];
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2021-02-05 17:12:51 +00:00
|
|
|
# loops forever
|
2024-09-26 11:04:55 +00:00
|
|
|
doCheck = !stdenv.hostPlatform.isDarwin;
|
2021-02-05 17:12:51 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
pythonImportsCheck = [ "somajo" ];
|
2022-01-25 03:21:06 +00:00
|
|
|
|
2021-01-15 22:18:51 +00:00
|
|
|
meta = with lib; {
|
2020-04-24 23:36:52 +00:00
|
|
|
description = "Tokenizer and sentence splitter for German and English web texts";
|
|
|
|
homepage = "https://github.com/tsproisl/SoMaJo";
|
2024-09-19 14:19:46 +00:00
|
|
|
changelog = "https://github.com/tsproisl/SoMaJo/blob/v${version}/CHANGES.txt";
|
2021-02-05 17:12:51 +00:00
|
|
|
license = licenses.gpl3Plus;
|
2024-07-31 10:19:44 +00:00
|
|
|
maintainers = [ ];
|
2024-09-19 14:19:46 +00:00
|
|
|
mainProgram = "somajo-tokenizer";
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
}
|