4d5a95770c
GitOrigin-RevId: 3c5319ad3aa51551182ac82ea17ab1c6b0f0df89
66 lines
1.1 KiB
Nix
66 lines
1.1 KiB
Nix
{ lib
|
|
, buildPythonPackage
|
|
, fetchPypi
|
|
, numpy
|
|
, pandas
|
|
, py4j
|
|
, pyarrow
|
|
, pythonOlder
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "pyspark";
|
|
version = "3.3.2";
|
|
format = "setuptools";
|
|
|
|
disabled = pythonOlder "3.7";
|
|
|
|
src = fetchPypi {
|
|
inherit pname version;
|
|
hash = "sha256-Df1dtDAMH2zJwW2NvfuC2IG0sXKYTacTRO3hqdSJPag=";
|
|
};
|
|
|
|
# pypandoc is broken with pandoc2, so we just lose docs.
|
|
postPatch = ''
|
|
sed -i "s/'pypandoc'//" setup.py
|
|
|
|
substituteInPlace setup.py \
|
|
--replace py4j== 'py4j>='
|
|
'';
|
|
|
|
propagatedBuildInputs = [
|
|
py4j
|
|
];
|
|
|
|
passthru.optional-dependencies = {
|
|
ml = [
|
|
numpy
|
|
];
|
|
mllib = [
|
|
numpy
|
|
];
|
|
sql = [
|
|
numpy
|
|
pandas
|
|
pyarrow
|
|
];
|
|
};
|
|
|
|
# Tests assume running spark instance
|
|
doCheck = false;
|
|
|
|
pythonImportsCheck = [
|
|
"pyspark"
|
|
];
|
|
|
|
meta = with lib; {
|
|
description = "Python bindings for Apache Spark";
|
|
homepage = "https://github.com/apache/spark/tree/master/python";
|
|
sourceProvenance = with sourceTypes; [
|
|
fromSource
|
|
binaryBytecode
|
|
];
|
|
license = licenses.asl20;
|
|
maintainers = with maintainers; [ shlevy ];
|
|
};
|
|
}
|