253 lines
4.6 KiB
Nix
253 lines
4.6 KiB
Nix
|
{
|
||
|
lib,
|
||
|
buildPythonPackage,
|
||
|
pythonOlder,
|
||
|
fetchFromGitHub,
|
||
|
setuptools,
|
||
|
# propagated build input
|
||
|
faiss,
|
||
|
torch,
|
||
|
transformers,
|
||
|
huggingface-hub,
|
||
|
numpy,
|
||
|
pyyaml,
|
||
|
regex,
|
||
|
# optional-dependencies
|
||
|
aiohttp,
|
||
|
fastapi,
|
||
|
uvicorn,
|
||
|
# TODO add apache-libcloud
|
||
|
# , apache-libcloud
|
||
|
rich,
|
||
|
duckdb,
|
||
|
pillow,
|
||
|
networkx,
|
||
|
python-louvain,
|
||
|
onnx,
|
||
|
onnxruntime,
|
||
|
soundfile,
|
||
|
scipy,
|
||
|
ttstokenizer,
|
||
|
beautifulsoup4,
|
||
|
nltk,
|
||
|
pandas,
|
||
|
tika,
|
||
|
imagehash,
|
||
|
timm,
|
||
|
fasttext,
|
||
|
sentencepiece,
|
||
|
accelerate,
|
||
|
onnxmltools,
|
||
|
annoy,
|
||
|
hnswlib,
|
||
|
# TODO add pymagnitude-lite
|
||
|
#, pymagnitude-lite
|
||
|
scikit-learn,
|
||
|
sentence-transformers,
|
||
|
croniter,
|
||
|
openpyxl,
|
||
|
requests,
|
||
|
xmltodict,
|
||
|
pgvector,
|
||
|
sqlite-vec,
|
||
|
python-multipart,
|
||
|
# native check inputs
|
||
|
pytestCheckHook,
|
||
|
# check inputs
|
||
|
httpx,
|
||
|
msgpack,
|
||
|
sqlalchemy,
|
||
|
}:
|
||
|
let
|
||
|
version = "7.4.0";
|
||
|
api = [
|
||
|
aiohttp
|
||
|
fastapi
|
||
|
pillow
|
||
|
python-multipart
|
||
|
uvicorn
|
||
|
];
|
||
|
ann = [
|
||
|
annoy
|
||
|
hnswlib
|
||
|
pgvector
|
||
|
sqlalchemy
|
||
|
sqlite-vec
|
||
|
];
|
||
|
# cloud = [ apache-libcloud ];
|
||
|
console = [ rich ];
|
||
|
|
||
|
database = [
|
||
|
duckdb
|
||
|
pillow
|
||
|
];
|
||
|
|
||
|
graph = [
|
||
|
networkx
|
||
|
python-louvain
|
||
|
];
|
||
|
|
||
|
model = [
|
||
|
onnx
|
||
|
onnxruntime
|
||
|
];
|
||
|
|
||
|
pipeline-audio = [
|
||
|
onnx
|
||
|
onnxruntime
|
||
|
soundfile
|
||
|
scipy
|
||
|
ttstokenizer
|
||
|
];
|
||
|
pipeline-data = [
|
||
|
beautifulsoup4
|
||
|
nltk
|
||
|
pandas
|
||
|
tika
|
||
|
];
|
||
|
pipeline-image = [
|
||
|
imagehash
|
||
|
pillow
|
||
|
timm
|
||
|
];
|
||
|
pipeline-text = [
|
||
|
fasttext
|
||
|
sentencepiece
|
||
|
];
|
||
|
pipeline-train = [
|
||
|
accelerate
|
||
|
onnx
|
||
|
onnxmltools
|
||
|
onnxruntime
|
||
|
];
|
||
|
pipeline = pipeline-audio ++ pipeline-data ++ pipeline-image ++ pipeline-text ++ pipeline-train;
|
||
|
|
||
|
similarity = [
|
||
|
annoy
|
||
|
fasttext
|
||
|
hnswlib
|
||
|
# pymagnitude-lite
|
||
|
scikit-learn
|
||
|
sentence-transformers
|
||
|
];
|
||
|
workflow = [
|
||
|
# apache-libcloud
|
||
|
croniter
|
||
|
openpyxl
|
||
|
pandas
|
||
|
pillow
|
||
|
requests
|
||
|
xmltodict
|
||
|
];
|
||
|
all = api ++ ann ++ console ++ database ++ graph ++ model ++ pipeline ++ similarity ++ workflow;
|
||
|
|
||
|
optional-dependencies = {
|
||
|
inherit
|
||
|
ann
|
||
|
api
|
||
|
console
|
||
|
database
|
||
|
graph
|
||
|
model
|
||
|
pipeline-audio
|
||
|
pipeline-image
|
||
|
pipeline-text
|
||
|
pipeline-train
|
||
|
pipeline
|
||
|
similarity
|
||
|
workflow
|
||
|
all
|
||
|
;
|
||
|
};
|
||
|
in
|
||
|
buildPythonPackage {
|
||
|
pname = "txtai";
|
||
|
inherit version;
|
||
|
pyproject = true;
|
||
|
|
||
|
|
||
|
disabled = pythonOlder "3.8";
|
||
|
|
||
|
src = fetchFromGitHub {
|
||
|
owner = "neuml";
|
||
|
repo = "txtai";
|
||
|
rev = "refs/tags/v${version}";
|
||
|
hash = "sha256-DQB12mFUMsKJ8cACowI1Vc7k2n1npdTOQknRmHd5EIM=";
|
||
|
};
|
||
|
|
||
|
buildTools = [ setuptools ];
|
||
|
|
||
|
pythonRemoveDeps = [
|
||
|
# We call it faiss, not faiss-cpu.
|
||
|
"faiss-cpu"
|
||
|
];
|
||
|
|
||
|
dependencies = [
|
||
|
faiss
|
||
|
torch
|
||
|
transformers
|
||
|
huggingface-hub
|
||
|
numpy
|
||
|
pyyaml
|
||
|
regex
|
||
|
];
|
||
|
|
||
|
optional-dependencies = optional-dependencies;
|
||
|
|
||
|
# The Python imports check runs huggingface-hub which needs a writable directory.
|
||
|
# `pythonImportsCheck` runs in the installPhase (before checkPhase).
|
||
|
preInstall = ''
|
||
|
export HF_HOME=$(mktemp -d)
|
||
|
'';
|
||
|
|
||
|
pythonImportsCheck = [ "txtai" ];
|
||
|
|
||
|
nativeCheckInputs = [
|
||
|
pytestCheckHook
|
||
|
] ++ optional-dependencies.ann ++ optional-dependencies.api ++ optional-dependencies.similarity;
|
||
|
|
||
|
checkInputs = [
|
||
|
httpx
|
||
|
msgpack
|
||
|
python-multipart
|
||
|
sqlalchemy
|
||
|
];
|
||
|
|
||
|
# The deselected paths depend on the huggingface hub and should be run as a passthru test
|
||
|
# disabledTestPaths won't work as the problem is with the classes containing the tests
|
||
|
# (in other words, it fails on __init__)
|
||
|
pytestFlagsArray = [
|
||
|
"test/python/test*.py"
|
||
|
"--deselect=test/python/testcloud.py"
|
||
|
"--deselect=test/python/testconsole.py"
|
||
|
"--deselect=test/python/testembeddings.py"
|
||
|
"--deselect=test/python/testgraph.py"
|
||
|
"--deselect=test/python/testapi/testembeddings.py"
|
||
|
"--deselect=test/python/testapi/testpipelines.py"
|
||
|
"--deselect=test/python/testapi/testworkflow.py"
|
||
|
"--deselect=test/python/testdatabase/testclient.py"
|
||
|
"--deselect=test/python/testdatabase/testduckdb.py"
|
||
|
"--deselect=test/python/testdatabase/testencoder.py"
|
||
|
"--deselect=test/python/testworkflow.py"
|
||
|
];
|
||
|
|
||
|
disabledTests = [
|
||
|
# Hardcoded paths
|
||
|
"testInvalidTar"
|
||
|
"testInvalidZip"
|
||
|
# Downloads from Huggingface
|
||
|
"testPipeline"
|
||
|
# Not finding sqlite-vec despite being supplied
|
||
|
"testSQLite"
|
||
|
"testSQLiteCustom"
|
||
|
];
|
||
|
|
||
|
meta = {
|
||
|
description = "Semantic search and workflows powered by language models";
|
||
|
changelog = "https://github.com/neuml/txtai/releases/tag/v${version}";
|
||
|
homepage = "https://github.com/neuml/txtai";
|
||
|
license = lib.licenses.asl20;
|
||
|
maintainers = with lib.maintainers; [ happysalada ];
|
||
|
};
|
||
|
}
|