{ lib, stdenv, buildPythonPackage, fetchFromGitHub, rustPlatform, pytestCheckHook, libiconv, numpy, protobuf, pyarrow, Security, SystemConfiguration, typing-extensions, }: let arrow-testing = fetchFromGitHub { name = "arrow-testing"; owner = "apache"; repo = "arrow-testing"; rev = "5bab2f264a23f5af68f69ea93d24ef1e8e77fc88"; hash = "sha256-Pxx8ohUpXb5u1995IvXmxQMqWiDJ+7LAll/AjQP7ph8="; }; parquet-testing = fetchFromGitHub { name = "parquet-testing"; owner = "apache"; repo = "parquet-testing"; rev = "e13af117de7c4f0a4d9908ae3827b3ab119868f3"; hash = "sha256-rVI9zyk9IRDlKv4u8BeMb0HRdWLfCpqOlYCeUdA7BB8="; }; in buildPythonPackage rec { pname = "datafusion"; version = "40.1.0"; pyproject = true; src = fetchFromGitHub { name = "datafusion-source"; owner = "apache"; repo = "arrow-datafusion-python"; rev = "refs/tags/${version}"; hash = "sha256-5WOSlx4XW9zO6oTY16lWQElShLv0ubflVPfSSEGrFgg="; }; cargoDeps = rustPlatform.fetchCargoTarball { name = "datafusion-cargo-deps"; inherit src; hash = "sha256-hN03tbnH77VsMDxSMddMHIH00t7lUs5h8rTHbiMIExw="; }; nativeBuildInputs = with rustPlatform; [ cargoSetupHook maturinBuildHook ]; buildInputs = [ protobuf ] ++ lib.optionals stdenv.hostPlatform.isDarwin [ libiconv Security SystemConfiguration ]; dependencies = [ pyarrow typing-extensions ]; nativeCheckInputs = [ pytestCheckHook numpy ]; pythonImportsCheck = [ "datafusion" ]; pytestFlagsArray = [ "--pyargs" pname ]; preCheck = '' pushd $TMPDIR ln -s ${arrow-testing} ./testing ln -s ${parquet-testing} ./parquet ''; postCheck = '' popd ''; meta = with lib; { description = "Extensible query execution framework"; longDescription = '' DataFusion is an extensible query execution framework, written in Rust, that uses Apache Arrow as its in-memory format. ''; homepage = "https://arrow.apache.org/datafusion/"; changelog = "https://github.com/apache/arrow-datafusion-python/blob/${version}/CHANGELOG.md"; license = with licenses; [ asl20 ]; maintainers = with maintainers; [ cpcloud ]; }; }