5ca88bfbb9
GitOrigin-RevId: 9f918d616c5321ad374ae6cb5ea89c9e04bf3e58
54 lines
1.5 KiB
Nix
54 lines
1.5 KiB
Nix
{ lib, stdenv, fetchFromGitHub, autoreconfHook, autoconf-archive, pkg-config
|
|
, leptonica, libpng, libtiff, icu, pango, opencl-headers }:
|
|
|
|
stdenv.mkDerivation rec {
|
|
pname = "tesseract";
|
|
version = "4.1.3";
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "tesseract-ocr";
|
|
repo = "tesseract";
|
|
rev = version;
|
|
hash = "sha256-sV3w53ky13ESc0dGPutMGQ4TcmOeWJkvUwBPIyzSTc8=";
|
|
};
|
|
|
|
# leptonica 1.83 made internal structures private. using internal headers isn't
|
|
# great, but tesseract4's days are numbered anyway
|
|
postPatch = ''
|
|
sed -i '/allheaders.h/a#include "pix_internal.h"' src/textord/devanagari_processing.cpp
|
|
|
|
# gcc-13 compat fix, simulate this upstream patch:
|
|
# https://github.com/tesseract-ocr/tesseract/commit/17e795aaae7d40dbcb7d3365835c2f55ecc6355d.patch
|
|
# https://github.com/tesseract-ocr/tesseract/commit/c0db7b7e930322826e09981360e39fdbd16cc9b0.patch
|
|
|
|
sed -i src/ccutil/helpers.h -e '1i #include <climits>'
|
|
sed -i src/ccutil/helpers.h -e '1i #include <cstdint>'
|
|
sed -i src/dict/matchdefs.h -e '1i #include <cstdint>'
|
|
'';
|
|
|
|
enableParallelBuilding = true;
|
|
|
|
nativeBuildInputs = [
|
|
pkg-config
|
|
autoreconfHook
|
|
autoconf-archive
|
|
];
|
|
|
|
buildInputs = [
|
|
leptonica
|
|
libpng
|
|
libtiff
|
|
icu
|
|
pango
|
|
opencl-headers
|
|
];
|
|
|
|
meta = {
|
|
description = "OCR engine";
|
|
homepage = "https://github.com/tesseract-ocr/tesseract";
|
|
license = lib.licenses.asl20;
|
|
maintainers = with lib.maintainers; [ erikarvstedt ];
|
|
platforms = with lib.platforms; linux ++ darwin;
|
|
mainProgram = "tesseract";
|
|
};
|
|
}
|