f34ce41345
GitOrigin-RevId: b73c2221a46c13557b1b3be9c2070cc42cf01eb3
215 lines
5.3 KiB
Nix
215 lines
5.3 KiB
Nix
{
|
|
buildPythonPackage,
|
|
cloudpickle,
|
|
crcmod,
|
|
cython,
|
|
dill,
|
|
fastavro,
|
|
fasteners,
|
|
fetchFromGitHub,
|
|
fetchpatch,
|
|
freezegun,
|
|
grpcio,
|
|
grpcio-tools,
|
|
hdfs,
|
|
httplib2,
|
|
hypothesis,
|
|
lib,
|
|
mock,
|
|
mypy-protobuf,
|
|
numpy,
|
|
objsize,
|
|
orjson,
|
|
pandas,
|
|
parameterized,
|
|
proto-plus,
|
|
protobuf,
|
|
psycopg2,
|
|
pyarrow,
|
|
pydot,
|
|
pyhamcrest,
|
|
pymongo,
|
|
pytest-xdist,
|
|
pytestCheckHook,
|
|
python,
|
|
python-dateutil,
|
|
pytz,
|
|
pyyaml,
|
|
regex,
|
|
requests,
|
|
requests-mock,
|
|
scikit-learn,
|
|
setuptools,
|
|
sqlalchemy,
|
|
tenacity,
|
|
testcontainers,
|
|
typing-extensions,
|
|
zstandard,
|
|
}:
|
|
|
|
buildPythonPackage rec {
|
|
pname = "apache-beam";
|
|
version = "2.56.0";
|
|
pyproject = true;
|
|
|
|
src = fetchFromGitHub {
|
|
owner = "apache";
|
|
repo = "beam";
|
|
rev = "refs/tags/v${version}";
|
|
hash = "sha256-SD+93duc3vTIlS/LPOuzXeiUSpwX+GNrqW3GTJMVgKY=";
|
|
};
|
|
|
|
patches = [
|
|
(fetchpatch {
|
|
# https://github.com/apache/beam/pull/24143
|
|
name = "fix-for-dill-0.3.6.patch";
|
|
url = "https://github.com/apache/beam/commit/7e014435b816015d21cc07f3f6c80809f3d8023d.patch";
|
|
hash = "sha256-iUmnzrItTFM98w3mpadzrmtI3t0fucpSujAg/6qxCGk=";
|
|
stripLen = 2;
|
|
})
|
|
];
|
|
|
|
pythonRelaxDeps = [
|
|
# See https://github.com/NixOS/nixpkgs/issues/156957
|
|
"dill"
|
|
"numpy"
|
|
"pymongo"
|
|
|
|
# See https://github.com/NixOS/nixpkgs/issues/193613
|
|
"protobuf"
|
|
|
|
# As of apache-beam v2.45.0, the requirement is httplib2>=0.8,<0.21.0, but
|
|
# the current (2023-02-08) nixpkgs's httplib2 version is 0.21.0. This can be
|
|
# removed once beam is upgraded since the current requirement on master is
|
|
# for httplib2>=0.8,<0.22.0.
|
|
"httplib2"
|
|
|
|
# As of apache-beam v2.45.0, the requirement is pyarrow<10.0.0,>=0.15.1, but
|
|
# the current (2023-02-22) nixpkgs's pyarrow version is 11.0.0.
|
|
"pyarrow"
|
|
];
|
|
|
|
sourceRoot = "${src.name}/sdks/python";
|
|
|
|
nativeBuildInputs = [
|
|
cython
|
|
grpcio-tools
|
|
mypy-protobuf
|
|
setuptools
|
|
];
|
|
|
|
propagatedBuildInputs = [
|
|
cloudpickle
|
|
crcmod
|
|
dill
|
|
fastavro
|
|
fasteners
|
|
grpcio
|
|
hdfs
|
|
httplib2
|
|
numpy
|
|
objsize
|
|
orjson
|
|
proto-plus
|
|
protobuf
|
|
pyarrow
|
|
pydot
|
|
pymongo
|
|
python-dateutil
|
|
pytz
|
|
regex
|
|
requests
|
|
typing-extensions
|
|
zstandard
|
|
];
|
|
|
|
enableParallelBuilding = true;
|
|
|
|
pythonImportsCheck = [ "apache_beam" ];
|
|
|
|
checkInputs = [
|
|
freezegun
|
|
hypothesis
|
|
mock
|
|
pandas
|
|
parameterized
|
|
psycopg2
|
|
pyhamcrest
|
|
pytestCheckHook
|
|
pytest-xdist
|
|
pyyaml
|
|
requests-mock
|
|
scikit-learn
|
|
sqlalchemy
|
|
tenacity
|
|
testcontainers
|
|
];
|
|
|
|
# Make sure we're running the tests for the actually installed
|
|
# package, so that cython's .so files are available.
|
|
preCheck = "cd $out/${python.sitePackages}";
|
|
|
|
disabledTestPaths = [
|
|
# Fails with
|
|
# _______ ERROR collecting apache_beam/io/external/xlang_jdbcio_it_test.py _______
|
|
# apache_beam/io/external/xlang_jdbcio_it_test.py:80: in <module>
|
|
# class CrossLanguageJdbcIOTest(unittest.TestCase):
|
|
# apache_beam/io/external/xlang_jdbcio_it_test.py:99: in CrossLanguageJdbcIOTest
|
|
# container_init: Callable[[], Union[PostgresContainer, MySqlContainer]],
|
|
# E NameError: name 'MySqlContainer' is not defined
|
|
#
|
|
"apache_beam/io/external/xlang_jdbcio_it_test.py"
|
|
|
|
# These tests depend on the availability of specific servers backends.
|
|
"apache_beam/runners/portability/flink_runner_test.py"
|
|
"apache_beam/runners/portability/samza_runner_test.py"
|
|
"apache_beam/runners/portability/spark_runner_test.py"
|
|
|
|
# Fails starting from dill 0.3.6 because it tries to pickle pytest globals:
|
|
# https://github.com/uqfoundation/dill/issues/482#issuecomment-1139017499.
|
|
"apache_beam/transforms/window_test.py"
|
|
|
|
# See https://github.com/apache/beam/issues/25390.
|
|
"apache_beam/coders/slow_coders_test.py"
|
|
"apache_beam/dataframe/pandas_doctests_test.py"
|
|
"apache_beam/typehints/typed_pipeline_test.py"
|
|
"apache_beam/coders/fast_coders_test.py"
|
|
"apache_beam/dataframe/schemas_test.py"
|
|
];
|
|
|
|
disabledTests = [
|
|
# The reasons of failures for these tests are unclear.
|
|
# They reproduce in Docker with Ubuntu 22.04
|
|
# (= they're not `nixpkgs`-specific) but given the upstream uses
|
|
# quite elaborate testing infra with containers and multiple
|
|
# different runners - I don't expect them to help debugging these
|
|
# when running via our (= custom from their PoV) testing infra.
|
|
"test_with_main_session"
|
|
# AssertionErrors
|
|
"test_unified_repr"
|
|
"testDictComprehension"
|
|
"testDictComprehensionSimple"
|
|
"testGenerator"
|
|
"testGeneratorComprehension"
|
|
"testListComprehension"
|
|
"testNoneReturn"
|
|
"testSet"
|
|
"testTupleListComprehension"
|
|
"test_newtype"
|
|
"test_pardo_type_inference"
|
|
"test_get_output_batch_type"
|
|
"test_pformat_namedtuple_with_unnamed_fields"
|
|
"test_row_coder_fail_early_bad_schema"
|
|
# See https://github.com/apache/beam/issues/26004.
|
|
"test_batch_encode_decode"
|
|
];
|
|
|
|
meta = with lib; {
|
|
description = "Unified model for defining both batch and streaming data-parallel processing pipelines";
|
|
homepage = "https://beam.apache.org/";
|
|
license = licenses.asl20;
|
|
maintainers = with maintainers; [ ndl ];
|
|
# https://github.com/apache/beam/issues/27221
|
|
broken = lib.versionAtLeast pandas.version "2";
|
|
};
|
|
}
|