depot/third_party/nixpkgs/patches/pr113137-ceph-bluestore.patch

311 lines
12 KiB
Diff

From 9ac395366bee383913c157911f76cbbbf09f226b Mon Sep 17 00:00:00 2001
From: Luke Granger-Brown <git@lukegb.com>
Date: Sun, 14 Feb 2021 17:57:50 +0000
Subject: [PATCH 1/3] ceph: use bundled rocksdb
Ceph have some custom patches to make RocksDB play nicely with
Bluestore, which means the system RocksDB package doesn't quite work
properly.
---
pkgs/tools/filesystems/ceph/default.nix | 8 +++-----
1 file changed, 3 insertions(+), 5 deletions(-)
diff --git a/pkgs/tools/filesystems/ceph/default.nix b/pkgs/tools/filesystems/ceph/default.nix
index 61cc4573693da..2d9ecfbcaa932 100644
--- a/pkgs/tools/filesystems/ceph/default.nix
+++ b/pkgs/tools/filesystems/ceph/default.nix
@@ -9,7 +9,7 @@
, babeltrace, gperf
, gtest
, cunit, snappy
-, rocksdb, makeWrapper
+, makeWrapper
, leveldb, oathToolkit
, libnl, libcap_ng
, rdkafka
@@ -146,7 +146,7 @@ in rec {
buildInputs = cryptoLibsMap.${cryptoStr} ++ [
boost ceph-python-env libxml2 optYasm optLibatomic_ops optLibs3
malloc zlib openldap lttng-ust babeltrace gperf gtest cunit
- snappy rocksdb lz4 oathToolkit leveldb libnl libcap_ng rdkafka
+ snappy lz4 oathToolkit leveldb libnl libcap_ng rdkafka
] ++ lib.optionals stdenv.isLinux [
linuxHeaders util-linux libuuid udev keyutils optLibaio optLibxfs optZfs
# ceph 14
@@ -171,12 +171,10 @@ in rec {
cmakeFlags = [
"-DWITH_PYTHON3=ON"
- "-DWITH_SYSTEM_ROCKSDB=OFF"
+ "-DWITH_SYSTEM_ROCKSDB=OFF" # breaks Bluestore
"-DCMAKE_INSTALL_DATADIR=${placeholder "lib"}/lib"
-
"-DWITH_SYSTEM_BOOST=ON"
- "-DWITH_SYSTEM_ROCKSDB=ON"
"-DWITH_SYSTEM_GTEST=ON"
"-DMGR_PYTHON_VERSION=${ceph-python-env.python.pythonVersion}"
"-DWITH_SYSTEMD=OFF"
From bfc11c643d9e0bd2d79c5de86d0bddce589b56d0 Mon Sep 17 00:00:00 2001
From: Luke Granger-Brown <git@lukegb.com>
Date: Sun, 14 Feb 2021 17:59:29 +0000
Subject: [PATCH 2/3] nixos/tests/ceph-single-node-bluestore: init
The current Ceph tests use the old method for OSDs to store data on
disks, known as Filestore. This means there are no tests for the
Bluestore functionality that run on install, which means that things
like RocksDB being broken can slip through and break the Bluestore
functionality in a subtle and difficult to debug manner.
Add a test to check that Bluestore works, at least on a single node.
---
nixos/tests/all-tests.nix | 1 +
nixos/tests/ceph-single-node-bluestore.nix | 196 +++++++++++++++++++++
2 files changed, 197 insertions(+)
create mode 100644 nixos/tests/ceph-single-node-bluestore.nix
diff --git a/nixos/tests/all-tests.nix b/nixos/tests/all-tests.nix
index 444580bc0bed6..530847575bcf6 100644
--- a/nixos/tests/all-tests.nix
+++ b/nixos/tests/all-tests.nix
@@ -55,6 +55,7 @@ in
cassandra_3_11 = handleTest ./cassandra.nix { testPackage = pkgs.cassandra_3_11; };
ceph-multi-node = handleTestOn ["x86_64-linux"] ./ceph-multi-node.nix {};
ceph-single-node = handleTestOn ["x86_64-linux"] ./ceph-single-node.nix {};
+ ceph-single-node-bluestore = handleTestOn ["x86_64-linux"] ./ceph-single-node-bluestore.nix {};
certmgr = handleTest ./certmgr.nix {};
cfssl = handleTestOn ["x86_64-linux"] ./cfssl.nix {};
charliecloud = handleTest ./charliecloud.nix {};
diff --git a/nixos/tests/ceph-single-node-bluestore.nix b/nixos/tests/ceph-single-node-bluestore.nix
new file mode 100644
index 0000000000000..cc873e8aee576
--- /dev/null
+++ b/nixos/tests/ceph-single-node-bluestore.nix
@@ -0,0 +1,196 @@
+import ./make-test-python.nix ({pkgs, lib, ...}:
+
+let
+ cfg = {
+ clusterId = "066ae264-2a5d-4729-8001-6ad265f50b03";
+ monA = {
+ name = "a";
+ ip = "192.168.1.1";
+ };
+ osd0 = {
+ name = "0";
+ key = "AQBCEJNa3s8nHRAANvdsr93KqzBznuIWm2gOGg==";
+ uuid = "55ba2294-3e24-478f-bee0-9dca4c231dd9";
+ };
+ osd1 = {
+ name = "1";
+ key = "AQBEEJNac00kExAAXEgy943BGyOpVH1LLlHafQ==";
+ uuid = "5e97a838-85b6-43b0-8950-cb56d554d1e5";
+ };
+ osd2 = {
+ name = "2";
+ key = "AQAdyhZeIaUlARAAGRoidDAmS6Vkp546UFEf5w==";
+ uuid = "ea999274-13d0-4dd5-9af9-ad25a324f72f";
+ };
+ };
+ generateCephConfig = { daemonConfig }: {
+ enable = true;
+ global = {
+ fsid = cfg.clusterId;
+ monHost = cfg.monA.ip;
+ monInitialMembers = cfg.monA.name;
+ };
+ } // daemonConfig;
+
+ generateHost = { pkgs, cephConfig, networkConfig, ... }: {
+ virtualisation = {
+ memorySize = 512;
+ emptyDiskImages = [ 20480 20480 20480 ];
+ vlans = [ 1 ];
+ };
+
+ networking = networkConfig;
+
+ environment.systemPackages = with pkgs; [
+ bash
+ sudo
+ ceph
+ xfsprogs
+ ];
+
+ boot.kernelModules = [ "xfs" ];
+
+ services.ceph = cephConfig;
+ };
+
+ networkMonA = {
+ dhcpcd.enable = false;
+ interfaces.eth1.ipv4.addresses = pkgs.lib.mkOverride 0 [
+ { address = cfg.monA.ip; prefixLength = 24; }
+ ];
+ };
+ cephConfigMonA = generateCephConfig { daemonConfig = {
+ mon = {
+ enable = true;
+ daemons = [ cfg.monA.name ];
+ };
+ mgr = {
+ enable = true;
+ daemons = [ cfg.monA.name ];
+ };
+ osd = {
+ enable = true;
+ daemons = [ cfg.osd0.name cfg.osd1.name cfg.osd2.name ];
+ };
+ }; };
+
+ # Following deployment is based on the manual deployment described here:
+ # https://docs.ceph.com/docs/master/install/manual-deployment/
+ # For other ways to deploy a ceph cluster, look at the documentation at
+ # https://docs.ceph.com/docs/master/
+ testscript = { ... }: ''
+ start_all()
+
+ monA.wait_for_unit("network.target")
+
+ # Bootstrap ceph-mon daemon
+ monA.succeed(
+ "sudo -u ceph ceph-authtool --create-keyring /tmp/ceph.mon.keyring --gen-key -n mon. --cap mon 'allow *'",
+ "sudo -u ceph ceph-authtool --create-keyring /etc/ceph/ceph.client.admin.keyring --gen-key -n client.admin --cap mon 'allow *' --cap osd 'allow *' --cap mds 'allow *' --cap mgr 'allow *'",
+ "sudo -u ceph ceph-authtool /tmp/ceph.mon.keyring --import-keyring /etc/ceph/ceph.client.admin.keyring",
+ "monmaptool --create --add ${cfg.monA.name} ${cfg.monA.ip} --fsid ${cfg.clusterId} /tmp/monmap",
+ "sudo -u ceph ceph-mon --mkfs -i ${cfg.monA.name} --monmap /tmp/monmap --keyring /tmp/ceph.mon.keyring",
+ "sudo -u ceph touch /var/lib/ceph/mon/ceph-${cfg.monA.name}/done",
+ "systemctl start ceph-mon-${cfg.monA.name}",
+ )
+ monA.wait_for_unit("ceph-mon-${cfg.monA.name}")
+ monA.succeed("ceph mon enable-msgr2")
+
+ # Can't check ceph status until a mon is up
+ monA.succeed("ceph -s | grep 'mon: 1 daemons'")
+
+ # Start the ceph-mgr daemon, after copying in the keyring
+ monA.succeed(
+ "sudo -u ceph mkdir -p /var/lib/ceph/mgr/ceph-${cfg.monA.name}/",
+ "ceph auth get-or-create mgr.${cfg.monA.name} mon 'allow profile mgr' osd 'allow *' mds 'allow *' > /var/lib/ceph/mgr/ceph-${cfg.monA.name}/keyring",
+ "systemctl start ceph-mgr-${cfg.monA.name}",
+ )
+ monA.wait_for_unit("ceph-mgr-a")
+ monA.wait_until_succeeds("ceph -s | grep 'quorum ${cfg.monA.name}'")
+ monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'")
+
+ # Bootstrap OSDs
+ monA.succeed(
+ "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd0.name}",
+ "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd0.name}/type",
+ "ln -sf /dev/vdb /var/lib/ceph/osd/ceph-${cfg.osd0.name}/block",
+ "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd1.name}",
+ "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd1.name}/type",
+ "ln -sf /dev/vdc /var/lib/ceph/osd/ceph-${cfg.osd1.name}/block",
+ "mkdir -p /var/lib/ceph/osd/ceph-${cfg.osd2.name}",
+ "echo bluestore > /var/lib/ceph/osd/ceph-${cfg.osd2.name}/type",
+ "ln -sf /dev/vdd /var/lib/ceph/osd/ceph-${cfg.osd2.name}/block",
+ "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd0.name}/keyring --name osd.${cfg.osd0.name} --add-key ${cfg.osd0.key}",
+ "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd1.name}/keyring --name osd.${cfg.osd1.name} --add-key ${cfg.osd1.key}",
+ "ceph-authtool --create-keyring /var/lib/ceph/osd/ceph-${cfg.osd2.name}/keyring --name osd.${cfg.osd2.name} --add-key ${cfg.osd2.key}",
+ 'echo \'{"cephx_secret": "${cfg.osd0.key}"}\' | ceph osd new ${cfg.osd0.uuid} -i -',
+ 'echo \'{"cephx_secret": "${cfg.osd1.key}"}\' | ceph osd new ${cfg.osd1.uuid} -i -',
+ 'echo \'{"cephx_secret": "${cfg.osd2.key}"}\' | ceph osd new ${cfg.osd2.uuid} -i -',
+ )
+
+ # Initialize the OSDs with regular filestore
+ monA.succeed(
+ "ceph-osd -i ${cfg.osd0.name} --mkfs --osd-uuid ${cfg.osd0.uuid}",
+ "ceph-osd -i ${cfg.osd1.name} --mkfs --osd-uuid ${cfg.osd1.uuid}",
+ "ceph-osd -i ${cfg.osd2.name} --mkfs --osd-uuid ${cfg.osd2.uuid}",
+ "chown -R ceph:ceph /var/lib/ceph/osd",
+ "systemctl start ceph-osd-${cfg.osd0.name}",
+ "systemctl start ceph-osd-${cfg.osd1.name}",
+ "systemctl start ceph-osd-${cfg.osd2.name}",
+ )
+ monA.wait_until_succeeds("ceph osd stat | grep -e '3 osds: 3 up[^,]*, 3 in'")
+ monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'")
+ monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'")
+
+ monA.succeed(
+ "ceph osd pool create single-node-test 32 32",
+ "ceph osd pool ls | grep 'single-node-test'",
+ "ceph osd pool rename single-node-test single-node-other-test",
+ "ceph osd pool ls | grep 'single-node-other-test'",
+ )
+ monA.wait_until_succeeds("ceph -s | grep '2 pools, 33 pgs'")
+ monA.succeed(
+ "ceph osd getcrushmap -o crush",
+ "crushtool -d crush -o decrushed",
+ "sed 's/step chooseleaf firstn 0 type host/step chooseleaf firstn 0 type osd/' decrushed > modcrush",
+ "crushtool -c modcrush -o recrushed",
+ "ceph osd setcrushmap -i recrushed",
+ "ceph osd pool set single-node-other-test size 2",
+ )
+ monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'")
+ monA.wait_until_succeeds("ceph -s | grep '33 active+clean'")
+ monA.fail(
+ "ceph osd pool ls | grep 'multi-node-test'",
+ "ceph osd pool delete single-node-other-test single-node-other-test --yes-i-really-really-mean-it",
+ )
+
+ # Shut down ceph by stopping ceph.target.
+ monA.succeed("systemctl stop ceph.target")
+
+ # Start it up
+ monA.succeed("systemctl start ceph.target")
+ monA.wait_for_unit("ceph-mon-${cfg.monA.name}")
+ monA.wait_for_unit("ceph-mgr-${cfg.monA.name}")
+ monA.wait_for_unit("ceph-osd-${cfg.osd0.name}")
+ monA.wait_for_unit("ceph-osd-${cfg.osd1.name}")
+ monA.wait_for_unit("ceph-osd-${cfg.osd2.name}")
+
+ # Ensure the cluster comes back up again
+ monA.succeed("ceph -s | grep 'mon: 1 daemons'")
+ monA.wait_until_succeeds("ceph -s | grep 'quorum ${cfg.monA.name}'")
+ monA.wait_until_succeeds("ceph osd stat | grep -e '3 osds: 3 up[^,]*, 3 in'")
+ monA.wait_until_succeeds("ceph -s | grep 'mgr: ${cfg.monA.name}(active,'")
+ monA.wait_until_succeeds("ceph -s | grep 'HEALTH_OK'")
+ '';
+in {
+ name = "basic-single-node-ceph-cluster-bluestore";
+ meta = with pkgs.lib.maintainers; {
+ maintainers = [ lukegb ];
+ };
+
+ nodes = {
+ monA = generateHost { pkgs = pkgs; cephConfig = cephConfigMonA; networkConfig = networkMonA; };
+ };
+
+ testScript = testscript;
+})
From 236df9cee7b22e37ce2e98a3ede370abc145dfea Mon Sep 17 00:00:00 2001
From: Luke Granger-Brown <git@lukegb.com>
Date: Sun, 14 Feb 2021 18:01:16 +0000
Subject: [PATCH 3/3] ceph: add passthru.tests
---
pkgs/tools/filesystems/ceph/default.nix | 2 ++
1 file changed, 2 insertions(+)
diff --git a/pkgs/tools/filesystems/ceph/default.nix b/pkgs/tools/filesystems/ceph/default.nix
index 2d9ecfbcaa932..aaa5806d402a9 100644
--- a/pkgs/tools/filesystems/ceph/default.nix
+++ b/pkgs/tools/filesystems/ceph/default.nix
@@ -13,6 +13,7 @@
, leveldb, oathToolkit
, libnl, libcap_ng
, rdkafka
+, nixosTests
# Optional Dependencies
, yasm ? null, fcgi ? null, expat ? null
@@ -199,6 +200,7 @@ in rec {
meta = getMeta "Distributed storage system";
passthru.version = version;
+ passthru.tests = { inherit (nixosTests) ceph-single-node ceph-multi-node ceph-single-node-bluestore; };
};
ceph-client = runCommand "ceph-client-${version}" {