100 lines
3.1 KiB
Nix
100 lines
3.1 KiB
Nix
|
{
|
||
|
lib,
|
||
|
stdenv,
|
||
|
maven,
|
||
|
jdk8,
|
||
|
fetchFromGitHub,
|
||
|
makeWrapper,
|
||
|
mvnDepsHash ? null,
|
||
|
enableOcr ? true,
|
||
|
tesseract,
|
||
|
nixosTests,
|
||
|
}:
|
||
|
|
||
|
let
|
||
|
mvnDepsHashes = {
|
||
|
"x86_64-linux" = "sha256-M8O1EJtlTm+mVy/qxapRcBWxD14eYL/LLUxP2uOBoM4=";
|
||
|
"aarch64-linux" = "sha256-+ewdV9g0MfgiBiRAimkIZp9lrOTKnKnBB1LqhIlOSaQ=";
|
||
|
"x86_64-darwin" = "sha256-nUAy2+O8REuq6pOWb8d+/c/YxPxj+XwtCtkaxfihDzc=";
|
||
|
"aarch64-darwin" = "sha256-D6adBXtBH1IokUwwA2Z6m+6rJP2xg6BK4rcPyDSgo6o=";
|
||
|
};
|
||
|
|
||
|
knownMvnDepsHash =
|
||
|
mvnDepsHashes.${stdenv.system}
|
||
|
or (lib.warn "This platform doesn't have a default mvnDepsHash value, you'll need to specify it manually" lib.fakeHash);
|
||
|
in
|
||
|
maven.buildMavenPackage rec {
|
||
|
pname = "tika";
|
||
|
version = "2.9.2";
|
||
|
|
||
|
src = fetchFromGitHub {
|
||
|
owner = "apache";
|
||
|
repo = "tika";
|
||
|
rev = version;
|
||
|
hash = "sha256-4pSQcLDKgIcU+YypJ/ywdthi6tI1852fGVOCREzUFH0=";
|
||
|
};
|
||
|
|
||
|
buildOffline = true;
|
||
|
|
||
|
manualMvnArtifacts = [
|
||
|
"org.objenesis:objenesis:2.1"
|
||
|
"org.apache.apache.resources:apache-jar-resource-bundle:1.5"
|
||
|
"org.apache.maven.surefire:surefire-junit-platform:3.1.2"
|
||
|
"org.junit.platform:junit-platform-launcher:1.10.0"
|
||
|
];
|
||
|
|
||
|
mvnJdk = jdk8;
|
||
|
mvnHash = if mvnDepsHash != null then mvnDepsHash else knownMvnDepsHash;
|
||
|
|
||
|
mvnParameters = toString [
|
||
|
"-DskipTests=true" # skip tests (out of memory exceptions)
|
||
|
"-Dossindex.skip" # skip dependency with vulnerability (recommended by upstream)
|
||
|
];
|
||
|
|
||
|
nativeBuildInputs = [ makeWrapper ];
|
||
|
|
||
|
installPhase =
|
||
|
let
|
||
|
binPath = lib.makeBinPath ([ jdk8.jre ] ++ lib.optionals enableOcr [ tesseract ]);
|
||
|
in
|
||
|
''
|
||
|
runHook preInstall
|
||
|
|
||
|
# Note: using * instead of version would match multiple files
|
||
|
install -Dm644 tika-app/target/tika-app-${version}.jar $out/share/tika/tika-app.jar
|
||
|
install -Dm644 tika-server/tika-server-standard/target/tika-server-standard-${version}.jar $out/share/tika/tika-server.jar
|
||
|
|
||
|
makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-app \
|
||
|
--add-flags "-jar $out/share/tika/tika-app.jar"
|
||
|
makeWrapper ${jdk8.jre}/bin/java $out/bin/tika-server \
|
||
|
--prefix PATH : ${binPath} \
|
||
|
--add-flags "-jar $out/share/tika/tika-server.jar"
|
||
|
|
||
|
runHook postInstall
|
||
|
'';
|
||
|
|
||
|
passthru.tests = {
|
||
|
inherit (nixosTests) tika;
|
||
|
};
|
||
|
|
||
|
meta = {
|
||
|
changelog = "https://github.com/apache/tika/blob/${src.rev}/CHANGES.txt";
|
||
|
description = "A toolkit for extracting metadata and text from over a thousand different file types";
|
||
|
longDescription = ''
|
||
|
The Apache Tika™ toolkit detects and extracts metadata and text
|
||
|
from over a thousand different file types (such as PPT, XLS, and PDF).
|
||
|
All of these file types can be parsed through a single interface,
|
||
|
making Tika useful for search engine indexing, content analysis,
|
||
|
translation, and much more.
|
||
|
'';
|
||
|
homepage = "https://tika.apache.org";
|
||
|
license = lib.licenses.asl20;
|
||
|
mainProgram = "tika-server";
|
||
|
maintainers = with lib.maintainers; [ tomasajt ];
|
||
|
sourceProvenance = with lib.sourceTypes; [
|
||
|
fromSource
|
||
|
binaryBytecode # maven dependencies
|
||
|
];
|
||
|
};
|
||
|
}
|