2022-03-30 09:31:56 +00:00
|
|
|
{ config, lib, pkgs, ... }:
|
2021-10-28 06:52:43 +00:00
|
|
|
with lib;
|
2020-04-24 23:36:52 +00:00
|
|
|
let
|
|
|
|
cfg = config.services.hadoop;
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
# Config files for hadoop services
|
2021-10-28 06:52:43 +00:00
|
|
|
hadoopConf = "${import ./conf.nix { inherit cfg pkgs lib; }}/";
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
# Generator for HDFS service options
|
|
|
|
hadoopServiceOption = { serviceName, firewallOption ? true, extraOpts ? null }: {
|
2024-04-21 15:54:59 +00:00
|
|
|
enable = mkEnableOption serviceName;
|
2022-03-30 09:31:56 +00:00
|
|
|
restartIfChanged = mkOption {
|
|
|
|
type = types.bool;
|
2024-04-21 15:54:59 +00:00
|
|
|
description = ''
|
2022-03-30 09:31:56 +00:00
|
|
|
Automatically restart the service on config change.
|
|
|
|
This can be set to false to defer restarts on clusters running critical applications.
|
|
|
|
Please consider the security implications of inadvertently running an older version,
|
|
|
|
and the possibility of unexpected behavior caused by inconsistent versions across a cluster when disabling this option.
|
|
|
|
'';
|
|
|
|
default = false;
|
|
|
|
};
|
|
|
|
extraFlags = mkOption{
|
|
|
|
type = with types; listOf str;
|
|
|
|
default = [];
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "Extra command line flags to pass to ${serviceName}";
|
2022-03-30 09:31:56 +00:00
|
|
|
example = [
|
|
|
|
"-Dcom.sun.management.jmxremote"
|
|
|
|
"-Dcom.sun.management.jmxremote.port=8010"
|
|
|
|
];
|
|
|
|
};
|
|
|
|
extraEnv = mkOption{
|
|
|
|
type = with types; attrsOf str;
|
|
|
|
default = {};
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "Extra environment variables for ${serviceName}";
|
2022-03-30 09:31:56 +00:00
|
|
|
};
|
|
|
|
} // (optionalAttrs firewallOption {
|
|
|
|
openFirewall = mkOption {
|
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "Open firewall ports for ${serviceName}.";
|
2022-03-30 09:31:56 +00:00
|
|
|
};
|
|
|
|
}) // (optionalAttrs (extraOpts != null) extraOpts);
|
|
|
|
|
|
|
|
# Generator for HDFS service configs
|
|
|
|
hadoopServiceConfig =
|
|
|
|
{ name
|
|
|
|
, serviceOptions ? cfg.hdfs."${toLower name}"
|
|
|
|
, description ? "Hadoop HDFS ${name}"
|
|
|
|
, User ? "hdfs"
|
|
|
|
, allowedTCPPorts ? [ ]
|
|
|
|
, preStart ? ""
|
|
|
|
, environment ? { }
|
|
|
|
, extraConfig ? { }
|
|
|
|
}: (
|
|
|
|
|
|
|
|
mkIf serviceOptions.enable ( mkMerge [{
|
|
|
|
systemd.services."hdfs-${toLower name}" = {
|
|
|
|
inherit description preStart;
|
|
|
|
environment = environment // serviceOptions.extraEnv;
|
|
|
|
wantedBy = [ "multi-user.target" ];
|
|
|
|
inherit (serviceOptions) restartIfChanged;
|
|
|
|
serviceConfig = {
|
|
|
|
inherit User;
|
|
|
|
SyslogIdentifier = "hdfs-${toLower name}";
|
|
|
|
ExecStart = "${cfg.package}/bin/hdfs --config ${hadoopConf} ${toLower name} ${escapeShellArgs serviceOptions.extraFlags}";
|
|
|
|
Restart = "always";
|
|
|
|
};
|
|
|
|
};
|
|
|
|
|
|
|
|
services.hadoop.gatewayRole.enable = true;
|
|
|
|
|
|
|
|
networking.firewall.allowedTCPPorts = mkIf
|
|
|
|
((builtins.hasAttr "openFirewall" serviceOptions) && serviceOptions.openFirewall)
|
|
|
|
allowedTCPPorts;
|
|
|
|
} extraConfig])
|
|
|
|
);
|
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
in
|
|
|
|
{
|
|
|
|
options.services.hadoop.hdfs = {
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
namenode = hadoopServiceOption { serviceName = "HDFS NameNode"; } // {
|
2021-12-06 16:07:01 +00:00
|
|
|
formatOnInit = mkOption {
|
2021-10-28 06:52:43 +00:00
|
|
|
type = types.bool;
|
|
|
|
default = false;
|
2024-04-21 15:54:59 +00:00
|
|
|
description = ''
|
2022-03-30 09:31:56 +00:00
|
|
|
Format HDFS namenode on first start. This is useful for quickly spinning up
|
|
|
|
ephemeral HDFS clusters with a single namenode.
|
|
|
|
For HA clusters, initialization involves multiple steps across multiple nodes.
|
|
|
|
Follow this guide to initialize an HA cluster manually:
|
2022-08-12 12:06:08 +00:00
|
|
|
<https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html>
|
2021-10-28 06:52:43 +00:00
|
|
|
'';
|
|
|
|
};
|
2021-12-06 16:07:01 +00:00
|
|
|
};
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
datanode = hadoopServiceOption { serviceName = "HDFS DataNode"; } // {
|
|
|
|
dataDirs = mkOption {
|
|
|
|
default = null;
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "Tier and path definitions for datanode storage.";
|
2022-03-30 09:31:56 +00:00
|
|
|
type = with types; nullOr (listOf (submodule {
|
|
|
|
options = {
|
|
|
|
type = mkOption {
|
|
|
|
type = enum [ "SSD" "DISK" "ARCHIVE" "RAM_DISK" ];
|
2024-04-21 15:54:59 +00:00
|
|
|
description = ''
|
2022-03-30 09:31:56 +00:00
|
|
|
Storage types ([SSD]/[DISK]/[ARCHIVE]/[RAM_DISK]) for HDFS storage policies.
|
|
|
|
'';
|
|
|
|
};
|
|
|
|
path = mkOption {
|
|
|
|
type = path;
|
|
|
|
example = [ "/var/lib/hadoop/hdfs/dn" ];
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "Determines where on the local filesystem a data node should store its blocks.";
|
2022-03-30 09:31:56 +00:00
|
|
|
};
|
|
|
|
};
|
|
|
|
}));
|
2021-12-06 16:07:01 +00:00
|
|
|
};
|
|
|
|
};
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
journalnode = hadoopServiceOption { serviceName = "HDFS JournalNode"; };
|
|
|
|
|
|
|
|
zkfc = hadoopServiceOption {
|
|
|
|
serviceName = "HDFS ZooKeeper failover controller";
|
|
|
|
firewallOption = false;
|
2021-12-06 16:07:01 +00:00
|
|
|
};
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
httpfs = hadoopServiceOption { serviceName = "HDFS JournalNode"; } // {
|
2021-12-06 16:07:01 +00:00
|
|
|
tempPath = mkOption {
|
|
|
|
type = types.path;
|
|
|
|
default = "/tmp/hadoop/httpfs";
|
2024-04-21 15:54:59 +00:00
|
|
|
description = "HTTPFS_TEMP path used by HTTPFS";
|
2021-10-28 06:52:43 +00:00
|
|
|
};
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
2022-03-30 09:31:56 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
};
|
|
|
|
|
|
|
|
config = mkMerge [
|
2022-03-30 09:31:56 +00:00
|
|
|
(hadoopServiceConfig {
|
|
|
|
name = "NameNode";
|
|
|
|
allowedTCPPorts = [
|
2021-10-28 06:52:43 +00:00
|
|
|
9870 # namenode.http-address
|
|
|
|
8020 # namenode.rpc-address
|
2022-03-30 09:31:56 +00:00
|
|
|
8022 # namenode.servicerpc-address
|
|
|
|
8019 # dfs.ha.zkfc.port
|
|
|
|
];
|
|
|
|
preStart = (mkIf cfg.hdfs.namenode.formatOnInit
|
|
|
|
"${cfg.package}/bin/hdfs --config ${hadoopConf} namenode -format -nonInteractive || true"
|
|
|
|
);
|
2020-04-24 23:36:52 +00:00
|
|
|
})
|
2021-10-28 06:52:43 +00:00
|
|
|
|
2022-03-30 09:31:56 +00:00
|
|
|
(hadoopServiceConfig {
|
|
|
|
name = "DataNode";
|
|
|
|
# port numbers for datanode changed between hadoop 2 and 3
|
|
|
|
allowedTCPPorts = if versionAtLeast cfg.package.version "3" then [
|
2021-10-28 06:52:43 +00:00
|
|
|
9864 # datanode.http.address
|
|
|
|
9866 # datanode.address
|
|
|
|
9867 # datanode.ipc.address
|
2022-03-30 09:31:56 +00:00
|
|
|
] else [
|
|
|
|
50075 # datanode.http.address
|
|
|
|
50010 # datanode.address
|
|
|
|
50020 # datanode.ipc.address
|
|
|
|
];
|
2022-08-12 12:06:08 +00:00
|
|
|
extraConfig.services.hadoop.hdfsSiteInternal."dfs.datanode.data.dir" = mkIf (cfg.hdfs.datanode.dataDirs!= null)
|
|
|
|
(concatMapStringsSep "," (x: "["+x.type+"]file://"+x.path) cfg.hdfs.datanode.dataDirs);
|
2020-04-24 23:36:52 +00:00
|
|
|
})
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2022-03-30 09:31:56 +00:00
|
|
|
(hadoopServiceConfig {
|
|
|
|
name = "JournalNode";
|
|
|
|
allowedTCPPorts = [
|
2021-12-06 16:07:01 +00:00
|
|
|
8480 # dfs.journalnode.http-address
|
|
|
|
8485 # dfs.journalnode.rpc-address
|
2022-03-30 09:31:56 +00:00
|
|
|
];
|
2021-12-06 16:07:01 +00:00
|
|
|
})
|
|
|
|
|
2022-03-30 09:31:56 +00:00
|
|
|
(hadoopServiceConfig {
|
|
|
|
name = "zkfc";
|
|
|
|
description = "Hadoop HDFS ZooKeeper failover controller";
|
|
|
|
})
|
2021-12-06 16:07:01 +00:00
|
|
|
|
2022-03-30 09:31:56 +00:00
|
|
|
(hadoopServiceConfig {
|
|
|
|
name = "HTTPFS";
|
|
|
|
environment.HTTPFS_TEMP = cfg.hdfs.httpfs.tempPath;
|
|
|
|
preStart = "mkdir -p $HTTPFS_TEMP";
|
|
|
|
User = "httpfs";
|
|
|
|
allowedTCPPorts = [
|
2021-12-06 16:07:01 +00:00
|
|
|
14000 # httpfs.http.port
|
2022-03-30 09:31:56 +00:00
|
|
|
];
|
2021-12-06 16:07:01 +00:00
|
|
|
})
|
2022-03-30 09:31:56 +00:00
|
|
|
|
|
|
|
(mkIf cfg.gatewayRole.enable {
|
2020-04-24 23:36:52 +00:00
|
|
|
users.users.hdfs = {
|
|
|
|
description = "Hadoop HDFS user";
|
|
|
|
group = "hadoop";
|
|
|
|
uid = config.ids.uids.hdfs;
|
|
|
|
};
|
|
|
|
})
|
2021-12-06 16:07:01 +00:00
|
|
|
(mkIf cfg.hdfs.httpfs.enable {
|
|
|
|
users.users.httpfs = {
|
|
|
|
description = "Hadoop HTTPFS user";
|
|
|
|
group = "hadoop";
|
|
|
|
isSystemUser = true;
|
|
|
|
};
|
|
|
|
})
|
2022-03-30 09:31:56 +00:00
|
|
|
|
2020-04-24 23:36:52 +00:00
|
|
|
];
|
|
|
|
}
|