From 9ee30cd9b51c46cea7193993d006bb4301588001 Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 27 Nov 2013 16:54:20 +0100 Subject: [PATCH] Add support for lightweight NixOS containers You can now say: systemd.containers.foo.config = { services.openssh.enable = true; services.openssh.ports = [ 2022 ]; users.extraUsers.root.openssh.authorizedKeys.keys = [ "ssh-dss ..." ]; }; which defines a NixOS instance with the given configuration running inside a lightweight container. You can also manage the configuration of the container independently from the host: systemd.containers.foo.path = "/nix/var/nix/profiles/containers/foo"; where "path" is a NixOS system profile. It can be created/updated by doing: $ nix-env --set -p /nix/var/nix/profiles/containers/foo \ -f '' -A system -I nixos-config=foo.nix The container configuration (foo.nix) should define boot.isContainer = true; to optimise away the building of a kernel and initrd. This is done automatically when using the "config" route. On the host, a lightweight container appears as the service "container-.service". The container is like a regular NixOS (virtual) machine, except that it doesn't have its own kernel. It has its own root file system (by default /var/lib/containers/), but shares the Nix store of the host (as a read-only bind mount). It also has access to the network devices of the host. Currently, if the configuration of the container changes, running "nixos-rebuild switch" on the host will cause the container to be rebooted. In the future we may want to send some message to the container so that it can activate the new container configuration without rebooting. Containers are not perfectly isolated yet. In particular, the host's /sys/fs/cgroup is mounted (writable!) in the guest. --- nixos/lib/eval-config.nix | 4 +- nixos/modules/module-list.nix | 5 +- nixos/modules/services/hardware/udev.nix | 11 +- nixos/modules/system/activation/top-level.nix | 27 ++-- nixos/modules/system/boot/kernel.nix | 2 +- .../modules/system/boot/loader/grub/grub.nix | 2 +- nixos/modules/system/boot/modprobe.nix | 2 +- nixos/modules/system/boot/stage-1.nix | 2 +- nixos/modules/system/boot/systemd.nix | 7 - nixos/modules/virtualisation/containers.nix | 137 ++++++++++++++++++ 10 files changed, 172 insertions(+), 27 deletions(-) create mode 100644 nixos/modules/virtualisation/containers.nix diff --git a/nixos/lib/eval-config.nix b/nixos/lib/eval-config.nix index 5e1ce69158f..4b8c7354a7e 100644 --- a/nixos/lib/eval-config.nix +++ b/nixos/lib/eval-config.nix @@ -8,6 +8,7 @@ , extraArgs ? {} , modules , check ? true +, prefix ? [] }: let extraArgs_ = extraArgs; pkgs_ = pkgs; system_ = system; in @@ -17,6 +18,7 @@ rec { # Merge the option definitions in all modules, forming the full # system configuration. inherit (pkgs.lib.evalModules { + inherit prefix; modules = modules ++ baseModules; args = extraArgs; check = check && options.environment.checkConfigurationOptions.value; @@ -48,7 +50,7 @@ rec { let system = if nixpkgsOptions.system != "" then nixpkgsOptions.system else system_; nixpkgsOptions = (import ./eval-config.nix { - inherit system extraArgs modules; + inherit system extraArgs modules prefix; # For efficiency, leave out most NixOS modules; they don't # define nixpkgs.config, so it's pointless to evaluate them. baseModules = [ ../modules/misc/nixpkgs.nix ]; diff --git a/nixos/modules/module-list.nix b/nixos/modules/module-list.nix index 2189d0358da..078ea225e16 100644 --- a/nixos/modules/module-list.nix +++ b/nixos/modules/module-list.nix @@ -247,11 +247,11 @@ ./system/boot/kexec.nix ./system/boot/loader/efi.nix ./system/boot/loader/generations-dir/generations-dir.nix - ./system/boot/loader/gummiboot/gummiboot.nix - ./system/boot/loader/raspberrypi/raspberrypi.nix ./system/boot/loader/grub/grub.nix ./system/boot/loader/grub/memtest.nix + ./system/boot/loader/gummiboot/gummiboot.nix ./system/boot/loader/init-script/init-script.nix + ./system/boot/loader/raspberrypi/raspberrypi.nix ./system/boot/luksroot.nix ./system/boot/modprobe.nix ./system/boot/shutdown.nix @@ -276,6 +276,7 @@ ./tasks/scsi-link-power-management.nix ./tasks/swraid.nix ./testing/service-runner.nix + ./virtualisation/containers.nix ./virtualisation/libvirtd.nix #./virtualisation/nova.nix ./virtualisation/virtualbox-guest.nix diff --git a/nixos/modules/services/hardware/udev.nix b/nixos/modules/services/hardware/udev.nix index 516569c0280..52b3ad43579 100644 --- a/nixos/modules/services/hardware/udev.nix +++ b/nixos/modules/services/hardware/udev.nix @@ -209,7 +209,7 @@ in ###### implementation - config = { + config = mkIf (!config.boot.isContainer) { services.udev.extraRules = nixosRules; @@ -231,9 +231,16 @@ in boot.extraModprobeConfig = "options firmware_class path=${config.hardware.firmware}"; - system.activationScripts.clearHotplug = + system.activationScripts.udevd = '' echo "" > /proc/sys/kernel/hotplug + + # Regenerate the hardware database /var/lib/udev/hwdb.bin + # whenever systemd changes. + if [ ! -e /var/lib/udev/prev-systemd -o "$(readlink /var/lib/udev/prev-systemd)" != ${config.systemd.package} ]; then + echo "regenerating udev hardware database..." + ${config.systemd.package}/bin/udevadm hwdb --update && ln -sfn ${config.systemd.package} /var/lib/udev/prev-systemd + fi ''; }; diff --git a/nixos/modules/system/activation/top-level.nix b/nixos/modules/system/activation/top-level.nix index ada96131675..4146cd8394a 100644 --- a/nixos/modules/system/activation/top-level.nix +++ b/nixos/modules/system/activation/top-level.nix @@ -34,16 +34,24 @@ let in '' mkdir $out - if [ ! -f ${kernelPath} ]; then - echo "The bootloader cannot find the proper kernel image." - echo "(Expecting ${kernelPath})" - false - fi + # Containers don't have their own kernel or initrd. They boot + # directly into stage 2. + ${optionalString (!config.boot.isContainer) '' + if [ ! -f ${kernelPath} ]; then + echo "The bootloader cannot find the proper kernel image." + echo "(Expecting ${kernelPath})" + false + fi - ln -s ${kernelPath} $out/kernel - ln -s ${config.system.modulesTree} $out/kernel-modules + ln -s ${kernelPath} $out/kernel + ln -s ${config.system.modulesTree} $out/kernel-modules - ln -s ${config.system.build.initialRamdisk}/initrd $out/initrd + echo -n "$kernelParams" > $out/kernel-params + + ln -s ${config.system.build.initialRamdisk}/initrd $out/initrd + + ln -s ${config.hardware.firmware} $out/firmware + ''} echo "$activationScript" > $out/activate substituteInPlace $out/activate --subst-var out @@ -56,9 +64,7 @@ let ln -s ${config.system.build.etc}/etc $out/etc ln -s ${config.system.path} $out/sw ln -s "$systemd" $out/systemd - ln -s ${config.hardware.firmware} $out/firmware - echo -n "$kernelParams" > $out/kernel-params echo -n "$configurationName" > $out/configuration-name echo -n "systemd ${toString config.systemd.package.interfaceVersion}" > $out/init-interface-version echo -n "$nixosVersion" > $out/nixos-version @@ -92,7 +98,6 @@ let systemd = config.systemd.package; inherit children; - kernelParams = config.boot.kernelParams; installBootLoader = config.system.build.installBootLoader or "echo 'Warning: do not know how to make this configuration bootable; please enable a boot loader.' 1>&2; true"; diff --git a/nixos/modules/system/boot/kernel.nix b/nixos/modules/system/boot/kernel.nix index c3c38b186bd..ee2f5e9b4f6 100644 --- a/nixos/modules/system/boot/kernel.nix +++ b/nixos/modules/system/boot/kernel.nix @@ -145,7 +145,7 @@ in ###### implementation - config = { + config = mkIf (!config.boot.isContainer) { system.build = { inherit kernel; }; diff --git a/nixos/modules/system/boot/loader/grub/grub.nix b/nixos/modules/system/boot/loader/grub/grub.nix index 8b3923e30a0..ef6ff71ed77 100644 --- a/nixos/modules/system/boot/loader/grub/grub.nix +++ b/nixos/modules/system/boot/loader/grub/grub.nix @@ -44,7 +44,7 @@ in boot.loader.grub = { enable = mkOption { - default = true; + default = !config.boot.isContainer; type = types.bool; description = '' Whether to enable the GNU GRUB boot loader. diff --git a/nixos/modules/system/boot/modprobe.nix b/nixos/modules/system/boot/modprobe.nix index 39928da8d19..027a7ac99d5 100644 --- a/nixos/modules/system/boot/modprobe.nix +++ b/nixos/modules/system/boot/modprobe.nix @@ -66,7 +66,7 @@ with pkgs.lib; ###### implementation - config = { + config = mkIf (!config.boot.isContainer) { environment.etc = singleton { source = pkgs.writeText "modprobe.conf" diff --git a/nixos/modules/system/boot/stage-1.nix b/nixos/modules/system/boot/stage-1.nix index 7f7184b1e45..8ed3aecb691 100644 --- a/nixos/modules/system/boot/stage-1.nix +++ b/nixos/modules/system/boot/stage-1.nix @@ -328,7 +328,7 @@ in }; - config = { + config = mkIf (!config.boot.isContainer) { assertions = singleton { assertion = any (fs: fs.mountPoint == "/") (attrValues config.fileSystems); diff --git a/nixos/modules/system/boot/systemd.nix b/nixos/modules/system/boot/systemd.nix index 143f923813d..9f5a7678c85 100644 --- a/nixos/modules/system/boot/systemd.nix +++ b/nixos/modules/system/boot/systemd.nix @@ -604,13 +604,6 @@ in mkdir -p /var/log/journal chmod 0755 /var/log/journal - # Regenerate the hardware database /var/lib/udev/hwdb.bin - # whenever systemd changes. - if [ ! -e /var/lib/udev/prev-systemd -o "$(readlink /var/lib/udev/prev-systemd)" != ${systemd} ]; then - echo "regenerating udev hardware database..." - ${systemd}/bin/udevadm hwdb --update && ln -sfn ${systemd} /var/lib/udev/prev-systemd - fi - # Make all journals readable to users in the wheel and adm # groups, in addition to those in the systemd-journal group. # Users can always read their own journals. diff --git a/nixos/modules/virtualisation/containers.nix b/nixos/modules/virtualisation/containers.nix new file mode 100644 index 00000000000..bcbfaacd703 --- /dev/null +++ b/nixos/modules/virtualisation/containers.nix @@ -0,0 +1,137 @@ +{ config, pkgs, ... }: + +with pkgs.lib; + +{ + options = { + + boot.isContainer = mkOption { + type = types.bool; + default = false; + description = '' + Whether this NixOS machine is a lightweight container running + in another NixOS system. + ''; + }; + + systemd.containers = mkOption { + type = types.attrsOf (types.submodule ( + { config, options, name, ... }: + { + options = { + + root = mkOption { + type = types.path; + description = '' + The root directory of the container. + ''; + }; + + config = mkOption { + description = '' + A specification of the desired configuration of this + container, as a NixOS module. + ''; + }; + + path = mkOption { + type = types.path; + example = "/nix/var/nix/profiles/containers/webserver"; + description = '' + As an alternative to specifying + , you can specify the path to + the evaluated NixOS system configuration, typically a + symlink to a system profile. + ''; + }; + + }; + + config = mkMerge + [ { root = mkDefault "/var/lib/containers/${name}"; + } + (mkIf options.config.isDefined { + path = (import ../../lib/eval-config.nix { + modules = + let extraConfig = + { boot.isContainer = true; + security.initialRootPassword = "!"; + networking.hostName = mkDefault name; + }; + in [ extraConfig config.config ]; + prefix = [ "systemd" "containers" name ]; + }).config.system.build.toplevel; + }) + ]; + })); + + default = {}; + example = literalExample + '' + { webserver = + { root = "/containers/webserver"; + path = "/nix/var/nix/profiles/webserver"; + }; + database = + { root = "/containers/database"; + config = + { config, pkgs, ... }: + { services.postgresql.enable = true; + services.postgresql.package = pkgs.postgresql92; + }; + }; + } + ''; + description = '' + A set of NixOS system configurations to be run as lightweight + containers. Each container appears as a service + container-name + on the host system, allowing it to be started and stopped via + systemctl . + ''; + }; + + }; + + + config = { + + systemd.services = mapAttrs' (name: container: nameValuePair "container-${name}" + { description = "Container '${name}'"; + + wantedBy = [ "multi-user.target" ]; + + unitConfig.RequiresMountsFor = [ container.root ]; + + preStart = + '' + mkdir -p -m 0755 ${container.root}/etc + if ! [ -e ${container.root}/etc/os-release ]; then + touch ${container.root}/etc/os-release + fi + ''; + + serviceConfig.ExecStart = + "${config.systemd.package}/bin/systemd-nspawn -M ${name} -D ${container.root} --bind-ro=/nix ${container.path}/init"; + + preStop = + '' + pid="$(cat /sys/fs/cgroup/systemd/machine/${name}.nspawn/system/tasks 2> /dev/null)" + if [ -n "$pid" ]; then + # Send the RTMIN+3 signal, which causes the container + # systemd to start halt.target. + echo "killing container systemd, PID = $pid" + kill -RTMIN+3 $pid + # Wait for the container to exit. We can't let systemd + # do this because it will send a signal to the entire + # cgroup. + for ((n = 0; n < 180; n++)); do + if ! kill -0 $pid 2> /dev/null; then break; fi + sleep 1 + done + fi + ''; + }) config.systemd.containers; + + }; +} \ No newline at end of file