2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
buildPythonPackage,
|
|
|
|
fetchPypi,
|
|
|
|
numpy,
|
|
|
|
pandas,
|
|
|
|
py4j,
|
|
|
|
pyarrow,
|
|
|
|
pythonOlder,
|
2021-10-17 09:34:42 +00:00
|
|
|
}:
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
buildPythonPackage rec {
|
|
|
|
pname = "pyspark";
|
2024-04-21 15:54:59 +00:00
|
|
|
version = "3.5.1";
|
2022-10-30 15:09:59 +00:00
|
|
|
format = "setuptools";
|
|
|
|
|
|
|
|
disabled = pythonOlder "3.7";
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
src = fetchPypi {
|
|
|
|
inherit pname version;
|
2024-04-21 15:54:59 +00:00
|
|
|
hash = "sha256-3WVp5Uc2Xq3E+Ie/V/FT5NWCpoxLSQ3kddVbmYFmSRA=";
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
# pypandoc is broken with pandoc2, so we just lose docs.
|
|
|
|
postPatch = ''
|
|
|
|
sed -i "s/'pypandoc'//" setup.py
|
|
|
|
|
2021-10-17 09:34:42 +00:00
|
|
|
substituteInPlace setup.py \
|
2022-09-30 11:47:45 +00:00
|
|
|
--replace py4j== 'py4j>='
|
2020-04-24 23:36:52 +00:00
|
|
|
'';
|
|
|
|
|
2024-07-27 06:49:29 +00:00
|
|
|
postFixup = ''
|
|
|
|
# find_python_home.py has been wrapped as a shell script
|
|
|
|
substituteInPlace $out/bin/find-spark-home \
|
|
|
|
--replace 'export SPARK_HOME=$($PYSPARK_DRIVER_PYTHON "$FIND_SPARK_HOME_PYTHON_SCRIPT")' \
|
|
|
|
'export SPARK_HOME=$("$FIND_SPARK_HOME_PYTHON_SCRIPT")'
|
|
|
|
# patch PYTHONPATH in pyspark so that it properly looks at SPARK_HOME
|
|
|
|
substituteInPlace $out/bin/pyspark \
|
|
|
|
--replace 'export PYTHONPATH="''${SPARK_HOME}/python/:$PYTHONPATH"' \
|
|
|
|
'export PYTHONPATH="''${SPARK_HOME}/..:''${SPARK_HOME}/python/:$PYTHONPATH"'
|
|
|
|
'';
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
propagatedBuildInputs = [ py4j ];
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2022-10-30 15:09:59 +00:00
|
|
|
passthru.optional-dependencies = {
|
2024-06-05 15:53:02 +00:00
|
|
|
ml = [ numpy ];
|
|
|
|
mllib = [ numpy ];
|
2022-10-30 15:09:59 +00:00
|
|
|
sql = [
|
|
|
|
numpy
|
|
|
|
pandas
|
|
|
|
pyarrow
|
|
|
|
];
|
|
|
|
};
|
|
|
|
|
2021-10-17 09:34:42 +00:00
|
|
|
# Tests assume running spark instance
|
2020-04-24 23:36:52 +00:00
|
|
|
doCheck = false;
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
pythonImportsCheck = [ "pyspark" ];
|
2021-10-17 09:34:42 +00:00
|
|
|
|
2021-01-15 22:18:51 +00:00
|
|
|
meta = with lib; {
|
2021-10-17 09:34:42 +00:00
|
|
|
description = "Python bindings for Apache Spark";
|
2020-04-24 23:36:52 +00:00
|
|
|
homepage = "https://github.com/apache/spark/tree/master/python";
|
2022-06-26 10:26:21 +00:00
|
|
|
sourceProvenance = with sourceTypes; [
|
|
|
|
fromSource
|
|
|
|
binaryBytecode
|
|
|
|
];
|
2020-04-24 23:36:52 +00:00
|
|
|
license = licenses.asl20;
|
2022-10-30 15:09:59 +00:00
|
|
|
maintainers = with maintainers; [ shlevy ];
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
}
|