depot/third_party/nixpkgs/pkgs/development/python-modules/rapidocr-onnxruntime/default.nix
Default email 7e47f3658e Project import generated by Copybara.
GitOrigin-RevId: 1925c603f17fc89f4c8f6bf6f631a802ad85d784
2024-09-26 11:04:55 +00:00

126 lines
3.5 KiB
Nix

{
lib,
stdenv,
buildPythonPackage,
fetchFromGitHub,
fetchzip,
substitute,
pytestCheckHook,
setuptools,
pyclipper,
opencv4,
numpy,
six,
shapely,
pyyaml,
pillow,
onnxruntime,
}:
let
version = "1.3.24";
src = fetchFromGitHub {
owner = "RapidAI";
repo = "RapidOCR";
rev = "refs/tags/v${version}";
hash = "sha256-+iY+/IdOgsn+LPZQ4Kdzxuh31csQ7dyh5Zf552ne3N0=";
};
models = fetchzip {
url = "https://github.com/RapidAI/RapidOCR/releases/download/v1.1.0/required_for_whl_v1.3.0.zip";
hash = "sha256-j/0nzyvu/HfNTt5EZ+2Phe5dkyPOdQw/OZTz0yS63aA=";
stripRoot = false;
} + "/required_for_whl_v1.3.0/resources/models";
in
buildPythonPackage {
pname = "rapidocr-onnxruntime";
inherit version src;
pyproject = true;
sourceRoot = "${src.name}/python";
# HACK:
# Upstream uses a very unconventional structure to organize the packages, and we have to coax the
# existing infrastructure to work with it.
# See https://github.com/RapidAI/RapidOCR/blob/02829ef986bc2a5c4f33e9c45c9267bcf2d07a1d/.github/workflows/gen_whl_to_pypi_rapidocr_ort.yml#L80-L92
# for the "intended" way of building this package.
# The setup.py supplied by upstream tries to determine the current version by
# fetching the latest version of the package from PyPI, and then bumping the version number.
# This is not allowed in the Nix build environment as we do not have internet access,
# hence we patch that out and get the version from the build environment directly.
patches = [
(substitute {
src = ./setup-py-override-version-checking.patch;
substitutions = [
"--subst-var-by"
"version"
version
];
})
];
postPatch = ''
mv setup_onnxruntime.py setup.py
mkdir -p rapidocr_onnxruntime/models
ln -s ${models}/* rapidocr_onnxruntime/models
# Magic patch from upstream - what does this even do??
echo "from .rapidocr_onnxruntime.main import RapidOCR, VisRes" > __init__.py
'';
# Upstream expects the source files to be under rapidocr_onnxruntime/rapidocr_onnxruntime
# instead of rapidocr_onnxruntime for the wheel to build correctly.
preBuild = ''
mkdir rapidocr_onnxruntime_t
mv rapidocr_onnxruntime rapidocr_onnxruntime_t
mv rapidocr_onnxruntime_t rapidocr_onnxruntime
'';
# Revert the above hack
postBuild = ''
mv rapidocr_onnxruntime rapidocr_onnxruntime_t
mv rapidocr_onnxruntime_t/* .
'';
build-system = [ setuptools ];
dependencies = [
pyclipper
opencv4
numpy
six
shapely
pyyaml
pillow
onnxruntime
];
# Remove because we have adopted the `opencv4` as an attribute name.
pythonRemoveDeps = [ "opencv-python" ];
pythonImportsCheck = [ "rapidocr_onnxruntime" ];
nativeCheckInputs = [ pytestCheckHook ];
# These are tests for different backends.
disabledTestPaths = [
"tests/test_vino.py"
"tests/test_paddle.py"
];
meta = {
# This seems to be related to https://github.com/microsoft/onnxruntime/issues/10038
# Also some related issue: https://github.com/NixOS/nixpkgs/pull/319053#issuecomment-2167713362
broken = (stdenv.hostPlatform.isLinux && stdenv.hostPlatform.isAarch64);
changelog = "https://github.com/RapidAI/RapidOCR/releases/tag/v${version}";
description = "Cross platform OCR Library based on OnnxRuntime";
homepage = "https://github.com/RapidAI/RapidOCR";
license = with lib.licenses; [ asl20 ];
maintainers = with lib.maintainers; [ pluiedev ];
mainProgram = "rapidocr_onnxruntime";
};
}