2023-07-15 17:15:38 +00:00
|
|
|
{ lib, python38, fetchPypi, fetchFromGitHub }:
|
2020-09-25 04:45:31 +00:00
|
|
|
let
|
2021-12-06 16:07:01 +00:00
|
|
|
python = python38.override {
|
2020-09-25 04:45:31 +00:00
|
|
|
self = python;
|
|
|
|
packageOverrides = self: super: {
|
2021-12-06 16:07:01 +00:00
|
|
|
sqlalchemy = super.sqlalchemy.overridePythonAttrs (oldAttrs: rec {
|
|
|
|
version = "1.3.24";
|
2023-07-15 17:15:38 +00:00
|
|
|
src = fetchPypi {
|
2023-04-29 16:46:19 +00:00
|
|
|
inherit (oldAttrs) pname;
|
2021-12-06 16:07:01 +00:00
|
|
|
inherit version;
|
2022-04-27 09:35:20 +00:00
|
|
|
hash = "sha256-67t3fL+TEjWbiXv4G6ANrg9ctp+6KhgmXcwYpvXvdRk=";
|
2021-12-06 16:07:01 +00:00
|
|
|
};
|
2022-04-27 09:35:20 +00:00
|
|
|
doCheck = false;
|
2021-12-06 16:07:01 +00:00
|
|
|
});
|
2020-09-25 04:45:31 +00:00
|
|
|
tornado = super.tornado_4;
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
2021-01-09 10:05:03 +00:00
|
|
|
in
|
|
|
|
with python.pkgs; buildPythonApplication rec {
|
|
|
|
pname = "grab-site";
|
2022-08-12 12:06:08 +00:00
|
|
|
version = "2.2.7";
|
2020-04-24 23:36:52 +00:00
|
|
|
|
|
|
|
src = fetchFromGitHub {
|
|
|
|
rev = version;
|
|
|
|
owner = "ArchiveTeam";
|
|
|
|
repo = "grab-site";
|
2022-08-12 12:06:08 +00:00
|
|
|
sha256 = "sha256-tf8GyFjya3+TVc2VjlY6ztfjCJgof6tg4an18pz+Ig8=";
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
|
2021-01-09 10:05:03 +00:00
|
|
|
postPatch = ''
|
|
|
|
substituteInPlace setup.py \
|
2021-12-06 16:07:01 +00:00
|
|
|
--replace '"wpull @ https://github.com/ArchiveTeam/ludios_wpull/tarball/master#egg=wpull-${ludios_wpull.version}"' '"wpull"'
|
2021-01-09 10:05:03 +00:00
|
|
|
'';
|
|
|
|
|
2020-09-25 04:45:31 +00:00
|
|
|
propagatedBuildInputs = [
|
2021-01-09 10:05:03 +00:00
|
|
|
click
|
|
|
|
ludios_wpull
|
|
|
|
manhole
|
|
|
|
lmdb
|
|
|
|
autobahn
|
|
|
|
fb-re2
|
|
|
|
websockets
|
2023-03-15 16:39:30 +00:00
|
|
|
faust-cchardet
|
2020-04-24 23:36:52 +00:00
|
|
|
];
|
|
|
|
|
|
|
|
checkPhase = ''
|
|
|
|
export PATH=$PATH:$out/bin
|
|
|
|
bash ./tests/offline-tests
|
|
|
|
'';
|
|
|
|
|
2021-01-15 22:18:51 +00:00
|
|
|
meta = with lib; {
|
2020-04-24 23:36:52 +00:00
|
|
|
description = "Crawler for web archiving with WARC output";
|
|
|
|
homepage = "https://github.com/ArchiveTeam/grab-site";
|
|
|
|
license = licenses.mit;
|
|
|
|
maintainers = with maintainers; [ ivan ];
|
|
|
|
platforms = platforms.all;
|
|
|
|
};
|
|
|
|
}
|