depot/third_party/nixpkgs/pkgs/development/python-modules/paddleocr/default.nix
Default email fa5436e0a7 Project import generated by Copybara.
GitOrigin-RevId: e8057b67ebf307f01bdcc8fba94d94f75039d1f6
2024-06-05 17:53:02 +02:00

116 lines
2.5 KiB
Nix

{
lib,
buildPythonPackage,
pythonRelaxDepsHook,
fetchFromGitHub,
attrdict,
beautifulsoup4,
cython,
fire,
fonttools,
lmdb,
lxml,
numpy,
opencv4,
openpyxl,
pdf2docx,
pillow,
premailer,
pyclipper,
pymupdf,
python-docx,
rapidfuzz,
scikit-image,
shapely,
tqdm,
paddlepaddle,
lanms-neo,
polygon3,
}:
let
version = "2.7.1";
in
buildPythonPackage {
pname = "paddleocr";
inherit version;
format = "setuptools";
src = fetchFromGitHub {
owner = "PaddlePaddle";
repo = "PaddleOCR";
rev = "v${version}";
hash = "sha256-5Dt4UL+7dwJNjcNnCVi3o8bLCt7/m/M6oh1vPu9rza8=";
};
patches = [
# The `ppocr.data.imaug` re-exports the `IaaAugment` and `CopyPaste`
# classes. These classes depend on the `imgaug` package which is
# unmaintained and has been removed from nixpkgs.
#
# The image OCR feature of PaddleOCR doesn't use these classes though, so
# they work even after stripping the the `IaaAugment` and `CopyPaste`
# exports. It probably breaks some of the OCR model creation tooling that
# PaddleOCR provides, however.
./remove-import-imaug.patch
];
nativeBuildInputs = [ pythonRelaxDepsHook ];
# trying to relax only pymupdf makes the whole build fail
pythonRelaxDeps = true;
pythonRemoveDeps = [
"imgaug"
"visualdl"
"opencv-python"
"opencv-contrib-python"
];
propagatedBuildInputs = [
attrdict
beautifulsoup4
cython
fire
fonttools
lmdb
lxml
numpy
opencv4
openpyxl
pdf2docx
pillow
premailer
pyclipper
pymupdf
python-docx
rapidfuzz
scikit-image
shapely
tqdm
paddlepaddle
lanms-neo
polygon3
];
# TODO: The tests depend, among possibly other things, on `cudatoolkit`.
# But Cudatoolkit fails to install.
# preCheck = "export HOME=$TMPDIR";
# nativeCheckInputs = with pkgs; [ which cudatoolkit ];
doCheck = false;
meta = with lib; {
homepage = "https://github.com/PaddlePaddle/PaddleOCR";
license = licenses.asl20;
description = "Multilingual OCR toolkits based on PaddlePaddle";
longDescription = ''
PaddleOCR aims to create multilingual, awesome, leading, and practical OCR
tools that help users train better models and apply them into practice.
'';
changelog = "https://github.com/PaddlePaddle/PaddleOCR/releases/tag/v${version}";
maintainers = with maintainers; [ happysalada ];
platforms = [
"x86_64-linux"
"x86_64-darwin"
"aarch64-darwin"
];
};
}