summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--nixos/modules/misc/ids.nix2
-rw-r--r--nixos/modules/module-list.nix1
-rw-r--r--nixos/modules/services/databases/cockroachdb.nix221
-rw-r--r--nixos/tests/cockroachdb.nix126
4 files changed, 350 insertions, 0 deletions
diff --git a/nixos/modules/misc/ids.nix b/nixos/modules/misc/ids.nix
index c368cd911860..d9ba2efa0c8a 100644
--- a/nixos/modules/misc/ids.nix
+++ b/nixos/modules/misc/ids.nix
@@ -337,6 +337,7 @@
alerta = 310;
minetest = 311;
rss2email = 312;
+ cockroachdb = 313;
# When adding a uid, make sure it doesn't match an existing gid. And don't use uids above 399!
@@ -634,6 +635,7 @@
alerta = 310;
minetest = 311;
rss2email = 312;
+ cockroachdb = 313;
# When adding a gid, make sure it doesn't match an existing
# uid. Users and groups with the same name should have equal
diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix
index 75e8446523f9..7a6fbab7c36e 100644
--- a/nixos/modules/module-list.nix
+++ b/nixos/modules/module-list.nix
@@ -212,6 +212,7 @@
./services/databases/aerospike.nix
./services/databases/cassandra.nix
./services/databases/clickhouse.nix
+ ./services/databases/cockroachdb.nix
./services/databases/couchdb.nix
./services/databases/firebird.nix
./services/databases/foundationdb.nix
diff --git a/nixos/modules/services/databases/cockroachdb.nix b/nixos/modules/services/databases/cockroachdb.nix
new file mode 100644
index 000000000000..1bc20a258045
--- /dev/null
+++ b/nixos/modules/services/databases/cockroachdb.nix
@@ -0,0 +1,221 @@
+{ config, lib, pkgs, ... }:
+
+with lib;
+
+let
+ cfg = config.services.cockroachdb;
+ crdb = cfg.package;
+
+ escape = builtins.replaceStrings ["%"] ["%%"];
+ ifNotNull = v: s: optionalString (!isNull v) s;
+
+ startupCommand = lib.concatStringsSep " "
+ [ # Basic startup
+ "${crdb}/bin/cockroach start"
+ "--logtostderr"
+ "--store=${cfg.dataDir}"
+ (ifNotNull cfg.locality "--locality='${cfg.locality}'")
+
+ # WebUI settings
+ "--http-addr='${cfg.http.address}:${toString cfg.http.port}'"
+
+ # Cluster listen address
+ "--listen-addr='${cfg.listen.address}:${toString cfg.listen.port}'"
+
+ # Cluster configuration
+ (ifNotNull cfg.join "--join=${cfg.join}")
+
+ # Cache and memory settings. Must be escaped.
+ "--cache='${escape cfg.cache}'"
+ "--max-sql-memory='${escape cfg.maxSqlMemory}'"
+
+ # Certificate/security settings.
+ (if cfg.insecure then "--insecure" else "--certs-dir=${cfg.certsDir}")
+ ];
+
+ addressOption = descr: defaultPort: {
+ address = mkOption {
+ type = types.str;
+ default = "localhost";
+ description = "Address to bind to for ${descr}";
+ };
+
+ port = mkOption {
+ type = types.int;
+ default = defaultPort;
+ description = "Port to bind to for ${descr}";
+ };
+ };
+in
+
+{
+ options = {
+ services.cockroachdb = {
+ enable = mkEnableOption "CockroachDB Server";
+
+ listen = addressOption "intra-cluster communication" 26257;
+
+ http = addressOption "http-based Admin UI" 8080;
+
+ locality = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = ''
+ An ordered, comma-separated list of key-value pairs that describe the
+ topography of the machine. Topography might include country,
+ datacenter or rack designations. Data is automatically replicated to
+ maximize diversities of each tier. The order of tiers is used to
+ determine the priority of the diversity, so the more inclusive
+ localities like country should come before less inclusive localities
+ like datacenter. The tiers and order must be the same on all nodes.
+ Including more tiers is better than including fewer. For example:
+
+ country=us,region=us-west,datacenter=us-west-1b,rack=12
+ country=ca,region=ca-east,datacenter=ca-east-2,rack=4
+
+ planet=earth,province=manitoba,colo=secondary,power=3
+ '';
+ };
+
+ join = mkOption {
+ type = types.nullOr types.str;
+ default = null;
+ description = "The addresses for connecting the node to a cluster.";
+ };
+
+ dataDir = mkOption {
+ type = types.path;
+ default = "/var/lib/cockroachdb";
+ description = "Location where CockroachDB stores its table files";
+ };
+
+ insecure = mkOption {
+ type = types.bool;
+ default = false;
+ description = "Run in insecure mode.";
+ };
+
+ certsDir = mkOption {
+ type = types.nullOr types.path;
+ default = null;
+ description = "The path to the certificate directory.";
+ };
+
+ user = mkOption {
+ type = types.str;
+ default = "cockroachdb";
+ description = "User account under which CockroachDB runs";
+ };
+
+ group = mkOption {
+ type = types.str;
+ default = "cockroachdb";
+ description = "User account under which CockroachDB runs";
+ };
+
+ openPorts = mkOption {
+ type = types.bool;
+ default = false;
+ description = "Open firewall ports for cluster communication by default";
+ };
+
+ cache = mkOption {
+ type = types.str;
+ default = "25%";
+ description = ''
+ The total size for caches.
+
+ This can be a percentage, expressed with a fraction sign or as a
+ decimal-point number, or any bytes-based unit. For example, "25%",
+ "0.25" both represent 25% of the available system memory. The values
+ "1000000000" and "1GB" both represent 1 gigabyte of memory.
+ '';
+ };
+
+ maxSqlMemory = mkOption {
+ type = types.str;
+ default = "25%";
+ description = ''
+ The maximum in-memory storage capacity available to store temporary
+ data for SQL queries.
+
+ This can be a percentage, expressed with a fraction sign or as a
+ decimal-point number, or any bytes-based unit. For example, "25%",
+ "0.25" both represent 25% of the available system memory. The values
+ "1000000000" and "1GB" both represent 1 gigabyte of memory.
+ '';
+ };
+
+ package = mkOption {
+ type = types.package;
+ default = pkgs.cockroachdb;
+ description = ''
+ The CockroachDB derivation to use for running the service.
+
+ This would primarily be useful to enable Enterprise Edition features
+ in your own custom CockroachDB build (Nixpkgs CockroachDB binaries
+ only contain open source features and open source code).
+ '';
+ };
+ };
+ };
+
+ config = mkIf config.services.cockroachdb.enable {
+ assertions = [
+ { assertion = !cfg.insecure -> !(isNull cfg.certsDir);
+ message = "CockroachDB must have a set of SSL certificates (.certsDir), or run in Insecure Mode (.insecure = true)";
+ }
+ ];
+
+ environment.systemPackages = [ crdb ];
+
+ users.users = optionalAttrs (cfg.user == "cockroachdb") (singleton
+ { name = "cockroachdb";
+ description = "CockroachDB Server User";
+ uid = config.ids.uids.cockroachdb;
+ group = cfg.group;
+ });
+
+ users.groups = optionalAttrs (cfg.group == "cockroachdb") (singleton
+ { name = "cockroachdb";
+ gid = config.ids.gids.cockroachdb;
+ });
+
+ networking.firewall.allowedTCPPorts = lib.optionals cfg.openPorts
+ [ cfg.http.port cfg.listen.port ];
+
+ systemd.services.cockroachdb =
+ { description = "CockroachDB Server";
+ documentation = [ "man:cockroach(1)" "https://www.cockroachlabs.com" ];
+
+ after = [ "network.target" "time-sync.target" ];
+ requires = [ "time-sync.target" ];
+ wantedBy = [ "multi-user.target" ];
+
+ unitConfig.RequiresMountsFor = "${cfg.dataDir}";
+
+ preStart = ''
+ if ! test -e ${cfg.dataDir}; then
+ mkdir -m 0700 -p ${cfg.dataDir}
+ chown -R ${cfg.user} ${cfg.dataDir}
+ fi
+ '';
+
+ serviceConfig =
+ { ExecStart = startupCommand;
+ Type = "notify";
+ User = cfg.user;
+ PermissionsStartOnly = true;
+
+ Restart = "always";
+ TimeoutStopSec="60";
+ RestartSec="10";
+ StandardOutput="syslog";
+ StandardError="syslog";
+ SyslogIdentifier="cockroach";
+ };
+ };
+ };
+
+ meta.maintainers = with lib.maintainers; [ thoughtpolice ];
+}
diff --git a/nixos/tests/cockroachdb.nix b/nixos/tests/cockroachdb.nix
new file mode 100644
index 000000000000..56c624d8cf2f
--- /dev/null
+++ b/nixos/tests/cockroachdb.nix
@@ -0,0 +1,126 @@
+# This performs a full 'end-to-end' test of a multi-node CockroachDB cluster
+# using the built-in 'cockroach workload' command, to simulate a semi-realistic
+# test load. It generally takes anywhere from 3-5 minutes to run and 1-2GB of
+# RAM (though each of 3 workers gets 1GB allocated)
+#
+# CockroachDB requires synchronized system clocks within a small error window
+# (~500ms by default) on each node in order to maintain a multi-node cluster.
+# Cluster joins that are outside this window will fail, and nodes that skew
+# outside the window after joining will promptly get kicked out.
+#
+# To accomodate this, we use QEMU/virtio infrastructure and load the 'ptp_kvm'
+# driver inside a guest. This driver allows the host machine to pass its clock
+# through to the guest as a hardware clock that appears as a Precision Time
+# Protocol (PTP) Clock device, generally /dev/ptp0. PTP devices can be measured
+# and used as hardware reference clocks (similar to an on-board GPS clock) by
+# NTP software. In our case, we use Chrony to synchronize to the reference
+# clock.
+#
+# This test is currently NOT enabled as a continuously-checked NixOS test.
+# Ideally, this test would be run by Hydra and Borg on all relevant changes,
+# except:
+#
+# - Not every build machine is compatible with the ptp_kvm driver.
+# Virtualized EC2 instances, for example, do not support loading the ptp_kvm
+# driver into guests. However, bare metal builders (e.g. Packet) do seem to
+# work just fine. In practice, this means x86_64-linux builds would fail
+# randomly, depending on which build machine got the job. (This is probably
+# worth some investigation; I imagine it's based on ptp_kvm's usage of paravirt
+# support which may not be available in 'nested' environments.)
+#
+# - ptp_kvm is not supported on aarch64, otherwise it seems likely Cockroach
+# could be tested there, as well. This seems to be due to the usage of
+# the TSC in ptp_kvm, which isn't supported (easily) on AArch64. (And:
+# testing stuff, not just making sure it builds, is important to ensure
+# aarch64 support remains viable.)
+#
+# For future developers who are reading this message, are daring and would want
+# to fix this, some options are:
+#
+# - Just test a single node cluster instead (boring and less thorough).
+# - Move all CI to bare metal packet builders, and we can at least do x86_64-linux.
+# - Get virtualized clocking working in aarch64, somehow.
+# - Add a 4th node that acts as an NTP service and uses no PTP clocks for
+# references, at the client level. This bloats the node and memory
+# requirements, but would probably allow both aarch64/x86_64 to work.
+#
+
+let
+
+ # Creates a node. If 'joinNode' parameter, a string containing an IP address,
+ # is non-null, then the CockroachDB server will attempt to join/connect to
+ # the cluster node specified at that address.
+ makeNode = locality: myAddr: joinNode:
+ { nodes, pkgs, lib, config, ... }:
+
+ {
+ # Bank/TPC-C benchmarks take some memory to complete
+ virtualisation.memorySize = 1024;
+
+ # Install the KVM PTP "Virtualized Clock" driver. This allows a /dev/ptp0
+ # device to appear as a reference clock, synchronized to the host clock.
+ # Because CockroachDB *requires* a time-synchronization mechanism for
+ # the system time in a cluster scenario, this is necessary to work.
+ boot.kernelModules = [ "ptp_kvm" ];
+
+ # Enable and configure Chrony, using the given virtualized clock passed
+ # through by KVM.
+ services.chrony.enable = true;
+ services.chrony.servers = lib.mkForce [ ];
+ services.chrony.extraConfig = ''
+ refclock PHC /dev/ptp0 poll 2 prefer require refid KVM
+ makestep 0.1 3
+ '';
+
+ # Enable CockroachDB. In order to ensure that Chrony has performed its
+ # first synchronization at boot-time (which may take ~10 seconds) before
+ # starting CockroachDB, we block the ExecStartPre directive using the
+ # 'waitsync' command. This ensures Cockroach doesn't have its system time
+ # leap forward out of nowhere during startup/execution.
+ #
+ # Note that the default threshold for NTP-based skew in CockroachDB is
+ # ~500ms by default, so making sure it's started *after* accurate time
+ # synchronization is extremely important.
+ services.cockroachdb.enable = true;
+ services.cockroachdb.insecure = true;
+ services.cockroachdb.openPorts = true;
+ services.cockroachdb.locality = locality;
+ services.cockroachdb.listen.address = myAddr;
+ services.cockroachdb.join = lib.mkIf (joinNode != null) joinNode;
+
+ # Hold startup until Chrony has performed its first measurement (which
+ # will probably result in a full timeskip, thanks to makestep)
+ systemd.services.cockroachdb.preStart = ''
+ ${pkgs.chrony}/bin/chronyc waitsync
+ '';
+ };
+
+in import ./make-test.nix ({ pkgs, ...} : {
+ name = "cockroachdb";
+ meta.maintainers = with pkgs.stdenv.lib.maintainers;
+ [ thoughtpolice ];
+
+ nodes = rec {
+ node1 = makeNode "country=us,region=east,dc=1" "192.168.1.1" null;
+ node2 = makeNode "country=us,region=west,dc=2b" "192.168.1.2" "192.168.1.1";
+ node3 = makeNode "country=eu,region=west,dc=2" "192.168.1.3" "192.168.1.1";
+ };
+
+ # NOTE: All the nodes must start in order and you must NOT use startAll, because
+ # there's otherwise no way to guarantee that node1 will start before the others try
+ # to join it.
+ testScript = ''
+ $node1->start;
+ $node1->waitForUnit("cockroachdb");
+
+ $node2->start;
+ $node2->waitForUnit("cockroachdb");
+
+ $node3->start;
+ $node3->waitForUnit("cockroachdb");
+
+ $node1->mustSucceed("cockroach sql --host=192.168.1.1 --insecure -e 'SHOW ALL CLUSTER SETTINGS' 2>&1");
+ $node1->mustSucceed("cockroach workload init bank 'postgresql://root\@192.168.1.1:26257?sslmode=disable'");
+ $node1->mustSucceed("cockroach workload run bank --duration=1m 'postgresql://root\@192.168.1.1:26257?sslmode=disable'");
+ '';
+})