2024-06-05 15:53:02 +00:00
|
|
|
{
|
|
|
|
lib,
|
|
|
|
stdenv,
|
|
|
|
botocore,
|
|
|
|
buildPythonPackage,
|
|
|
|
cryptography,
|
|
|
|
cssselect,
|
|
|
|
defusedxml,
|
|
|
|
fetchFromGitHub,
|
|
|
|
glibcLocales,
|
|
|
|
installShellFiles,
|
|
|
|
itemadapter,
|
|
|
|
itemloaders,
|
|
|
|
jmespath,
|
|
|
|
lxml,
|
|
|
|
packaging,
|
|
|
|
parsel,
|
|
|
|
pexpect,
|
|
|
|
protego,
|
|
|
|
pydispatcher,
|
|
|
|
pyopenssl,
|
|
|
|
pytestCheckHook,
|
|
|
|
pythonOlder,
|
|
|
|
queuelib,
|
|
|
|
service-identity,
|
|
|
|
setuptools,
|
|
|
|
sybil,
|
|
|
|
testfixtures,
|
|
|
|
tldextract,
|
|
|
|
twisted,
|
|
|
|
uvloop,
|
|
|
|
w3lib,
|
|
|
|
zope-interface,
|
2020-04-24 23:36:52 +00:00
|
|
|
}:
|
|
|
|
|
|
|
|
buildPythonPackage rec {
|
2021-06-28 23:13:55 +00:00
|
|
|
pname = "scrapy";
|
2024-06-05 15:53:02 +00:00
|
|
|
version = "2.11.2";
|
2024-02-29 20:09:43 +00:00
|
|
|
pyproject = true;
|
2022-05-18 14:49:53 +00:00
|
|
|
|
2023-10-09 19:29:22 +00:00
|
|
|
disabled = pythonOlder "3.8";
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
src = fetchFromGitHub {
|
|
|
|
owner = "scrapy";
|
|
|
|
repo = "scrapy";
|
|
|
|
rev = "refs/tags/${version}";
|
|
|
|
hash = "sha256-EaO1kQ3VSTwEW+r0kSKycOxHNTPwwCVjch1ZBrTU0qQ=";
|
2021-06-28 23:13:55 +00:00
|
|
|
};
|
2020-04-24 23:36:52 +00:00
|
|
|
|
2024-05-15 15:35:15 +00:00
|
|
|
|
2021-06-28 23:13:55 +00:00
|
|
|
nativeBuildInputs = [
|
|
|
|
installShellFiles
|
2024-02-29 20:09:43 +00:00
|
|
|
setuptools
|
2020-04-24 23:36:52 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
propagatedBuildInputs = [
|
|
|
|
cryptography
|
|
|
|
cssselect
|
2024-06-05 15:53:02 +00:00
|
|
|
defusedxml
|
2021-06-28 23:13:55 +00:00
|
|
|
itemadapter
|
|
|
|
itemloaders
|
2020-04-24 23:36:52 +00:00
|
|
|
lxml
|
2022-10-30 15:09:59 +00:00
|
|
|
packaging
|
2020-04-24 23:36:52 +00:00
|
|
|
parsel
|
2021-06-28 23:13:55 +00:00
|
|
|
protego
|
2020-04-24 23:36:52 +00:00
|
|
|
pydispatcher
|
|
|
|
pyopenssl
|
|
|
|
queuelib
|
|
|
|
service-identity
|
2022-03-30 09:31:56 +00:00
|
|
|
tldextract
|
2021-06-28 23:13:55 +00:00
|
|
|
twisted
|
2020-04-24 23:36:52 +00:00
|
|
|
w3lib
|
2024-01-13 08:15:51 +00:00
|
|
|
zope-interface
|
2021-06-28 23:13:55 +00:00
|
|
|
];
|
|
|
|
|
2023-02-02 18:25:31 +00:00
|
|
|
nativeCheckInputs = [
|
2021-06-28 23:13:55 +00:00
|
|
|
botocore
|
|
|
|
glibcLocales
|
|
|
|
jmespath
|
2023-11-16 04:20:00 +00:00
|
|
|
pexpect
|
2021-06-28 23:13:55 +00:00
|
|
|
pytestCheckHook
|
|
|
|
sybil
|
|
|
|
testfixtures
|
2024-06-05 15:53:02 +00:00
|
|
|
uvloop
|
2021-06-28 23:13:55 +00:00
|
|
|
];
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
LC_ALL = "en_US.UTF-8";
|
|
|
|
|
2021-06-28 23:13:55 +00:00
|
|
|
disabledTestPaths = [
|
|
|
|
"tests/test_proxy_connect.py"
|
|
|
|
"tests/test_utils_display.py"
|
|
|
|
"tests/test_command_check.py"
|
|
|
|
# Don't test the documentation
|
|
|
|
"docs"
|
2020-10-16 20:44:37 +00:00
|
|
|
];
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
disabledTests =
|
|
|
|
[
|
|
|
|
# Requires network access
|
|
|
|
"AnonymousFTPTestCase"
|
|
|
|
"FTPFeedStorageTest"
|
|
|
|
"FeedExportTest"
|
|
|
|
"test_custom_asyncio_loop_enabled_true"
|
|
|
|
"test_custom_loop_asyncio"
|
|
|
|
"test_custom_loop_asyncio_deferred_signal"
|
|
|
|
"FileFeedStoragePreFeedOptionsTest" # https://github.com/scrapy/scrapy/issues/5157
|
|
|
|
"test_persist"
|
|
|
|
"test_timeout_download_from_spider_nodata_rcvd"
|
|
|
|
"test_timeout_download_from_spider_server_hangs"
|
|
|
|
"test_unbounded_response"
|
|
|
|
"CookiesMiddlewareTest"
|
|
|
|
# Test fails on Hydra
|
|
|
|
"test_start_requests_laziness"
|
|
|
|
]
|
|
|
|
++ lib.optionals stdenv.isDarwin [
|
|
|
|
"test_xmliter_encoding"
|
|
|
|
"test_download"
|
|
|
|
"test_reactor_default_twisted_reactor_select"
|
|
|
|
"URIParamsSettingTest"
|
|
|
|
"URIParamsFeedOptionTest"
|
|
|
|
# flaky on darwin-aarch64
|
|
|
|
"test_fixed_delay"
|
|
|
|
"test_start_requests_laziness"
|
|
|
|
];
|
2020-10-16 20:44:37 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
postInstall = ''
|
2021-06-28 23:13:55 +00:00
|
|
|
installManPage extras/scrapy.1
|
2023-02-09 11:40:11 +00:00
|
|
|
installShellCompletion --cmd scrapy \
|
|
|
|
--zsh extras/scrapy_zsh_completion \
|
|
|
|
--bash extras/scrapy_bash_completion
|
2020-04-24 23:36:52 +00:00
|
|
|
'';
|
|
|
|
|
2024-06-05 15:53:02 +00:00
|
|
|
pythonImportsCheck = [ "scrapy" ];
|
2021-06-28 23:13:55 +00:00
|
|
|
|
2021-01-15 22:18:51 +00:00
|
|
|
__darwinAllowLocalNetworking = true;
|
|
|
|
|
|
|
|
meta = with lib; {
|
2021-06-28 23:13:55 +00:00
|
|
|
description = "High-level web crawling and web scraping framework";
|
2024-04-21 15:54:59 +00:00
|
|
|
mainProgram = "scrapy";
|
2021-06-28 23:13:55 +00:00
|
|
|
longDescription = ''
|
|
|
|
Scrapy is a fast high-level web crawling and web scraping framework, used to crawl
|
|
|
|
websites and extract structured data from their pages. It can be used for a wide
|
|
|
|
range of purposes, from data mining to monitoring and automated testing.
|
|
|
|
'';
|
2020-04-24 23:36:52 +00:00
|
|
|
homepage = "https://scrapy.org/";
|
2021-10-09 14:59:57 +00:00
|
|
|
changelog = "https://github.com/scrapy/scrapy/raw/${version}/docs/news.rst";
|
2020-04-24 23:36:52 +00:00
|
|
|
license = licenses.bsd3;
|
2024-05-15 15:35:15 +00:00
|
|
|
maintainers = with maintainers; [ vinnymeller ];
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
}
|