fa5436e0a7
GitOrigin-RevId: e8057b67ebf307f01bdcc8fba94d94f75039d1f6
116 lines
2.5 KiB
Nix
116 lines
2.5 KiB
Nix
{
|
|
lib,
|
|
buildPythonPackage,
|
|
pythonRelaxDepsHook,
|
|
fetchFromGitHub,
|
|
attrdict,
|
|
beautifulsoup4,
|
|
cython,
|
|
fire,
|
|
fonttools,
|
|
lmdb,
|
|
lxml,
|
|
numpy,
|
|
opencv4,
|
|
openpyxl,
|
|
pdf2docx,
|
|
pillow,
|
|
premailer,
|
|
pyclipper,
|
|
pymupdf,
|
|
python-docx,
|
|
rapidfuzz,
|
|
scikit-image,
|
|
shapely,
|
|
tqdm,
|
|
paddlepaddle,
|
|
lanms-neo,
|
|
polygon3,
|
|
}:
|
|
|
|
let
|
|
version = "2.7.1";
|
|
in
|
|
buildPythonPackage {
|
|
pname = "paddleocr";
|
|
inherit version;
|
|
format = "setuptools";
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "PaddlePaddle";
|
|
repo = "PaddleOCR";
|
|
rev = "v${version}";
|
|
hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8=";
|
|
};
|
|
|
|
patches = [
|
|
# The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
|
|
# classes. These classes depend on the `imgaug` package which is
|
|
# unmaintained and has been removed from nixpkgs.
|
|
#
|
|
# The image OCR feature of PaddleOCR doesn't use these classes though, so
|
|
# they work even after stripping the the `IaaAugment` and `CopyPaste`
|
|
# exports. It probably breaks some of the OCR model creation tooling that
|
|
# PaddleOCR provides, however.
|
|
./remove-import-imaug.patch
|
|
];
|
|
|
|
nativeBuildInputs = [ pythonRelaxDepsHook ];
|
|
# trying to relax only pymupdf makes the whole build fail
|
|
pythonRelaxDeps = true;
|
|
pythonRemoveDeps = [
|
|
"imgaug"
|
|
"visualdl"
|
|
"opencv-python"
|
|
"opencv-contrib-python"
|
|
];
|
|
|
|
propagatedBuildInputs = [
|
|
attrdict
|
|
beautifulsoup4
|
|
cython
|
|
fire
|
|
fonttools
|
|
lmdb
|
|
lxml
|
|
numpy
|
|
opencv4
|
|
openpyxl
|
|
pdf2docx
|
|
pillow
|
|
premailer
|
|
pyclipper
|
|
pymupdf
|
|
python-docx
|
|
rapidfuzz
|
|
scikit-image
|
|
shapely
|
|
tqdm
|
|
paddlepaddle
|
|
lanms-neo
|
|
polygon3
|
|
];
|
|
|
|
# TODO: The tests depend, among possibly other things, on `cudatoolkit`.
|
|
# But Cudatoolkit fails to install.
|
|
# preCheck = "export HOME=$TMPDIR";
|
|
# nativeCheckInputs = with pkgs; [ which cudatoolkit ];
|
|
doCheck = false;
|
|
|
|
meta = with lib; {
|
|
homepage = "https://github.com/PaddlePaddle/PaddleOCR";
|
|
license = licenses.asl20;
|
|
description = "Multilingual OCR toolkits based on PaddlePaddle";
|
|
longDescription = ''
|
|
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
|
|
tools that help users train better models and apply them into practice.
|
|
'';
|
|
changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}";
|
|
maintainers = with maintainers; [ happysalada ];
|
|
platforms = [
|
|
"x86_64-linux"
|
|
"x86_64-darwin"
|
|
"aarch64-darwin"
|
|
];
|
|
};
|
|
}
|