Rebuilding the world (with nix)

I ran into an interesting problem when adding support for Modified Condition/Decision Coverage (MC/DC) in gcc (a topic for a later post): how do you large scale test a new compiler feature? In this case, I wanted to figure out what kind of control flow graphs gcc could create, how particular graph shapes would interact with my changes, and if my assumptions actually held. Hand rolled examples took me quite far, but not to the point I was confident I didn’t miss anything.

Trying to answer some of these questions I started pulling random new and old free software packages and building them with my snapshot. It paid off right away - already in the first few projects I came over a few graph shapes that were surprising and found defects. All good, except now I had to rebuild the same projects with the new snapshot. Every new project meant also figuring out exactly how to ./configure && make, which tends to be similar, but not with enough exceptions for it to be troublesome.

I got to about five projects before I gave up - there must be a better way. This is where the big software distributions come in. I would imagine you can achieve something similar with Debian, portage, ports, but would probably involve setting up some infrastructure so that packages pull my snapshot compiler. The simple apt source <pkg> && dpkg-buildpackage is not really sufficient.

So I gave it a go in nix, and ended up with this default.nix:

{ pkgs ? import <nixpkgs> {
    overlays = [
        (self: super:
            let
                # Use a different source tarball for gcc13
                gcc13cc = super.gcc13.cc.overrideAttrs (old: rec {
                    version = "13.1.0-mcdc";
                    src = super.fetchurl {
                        url = "file:///home/src/gcc/gcc-snapshot.tar.gz";
                        sha256 = "sha256:0000000000000000000000000000000000000000000000000000";
                    };
                    nativeBuildInputs = old.nativeBuildInputs ++ [super.flex];
                    patches = builtins.filter (p: builtins.baseNameOf p != "no-sys-dirs-riscv.patch") old.patches;
                });

                # haskell's compose: [f g h] env = h(g(f(env)))
                compose = fns: stdenv: builtins.foldl' (e: fn: fn e) stdenv fns;

                cflags  = super.withCFlags ["--coverage" "-fcondition-coverage"];
                ldflags = super.addAttrsToDerivation { NIX_CFLAGS_LINK = "--coverage"; }; # TODO: -lgcov
                # link perl modules with gcc, which properly links lcov
                plflags = super.addAttrsToDerivation { perlPreHook = "export LD=$CC"; };
                withcc  = cc: stdenv: super.overrideCC stdenv cc;
                gcc13   = super.gcc13.override { cc = gcc13cc; };
            in {
                gcc13Stdenv = compose [(withcc gcc13) cflags plflags] super.gcc13Stdenv;
            }
        )

        (self: super: {
            # skip check phase on libxcrypt as it uses --wrap, which seems to not
            # resolve -lgcov, probably linking order
            libxcrypt = super.libxcrypt.overrideAttrs (_: { doCheck = false; });

            # these packages are necessary to run the tests, so skipping the
            # tests entirely would be an alternative
            gnutls = super.gnutls.overrideAttrs (old: {
                buildInputs = old.buildInputs ++ [
                    super.openssl
                    super.datefudge
                    super.socat
                ];
            });

            libdaemon = super.libdaemon.overrideAttrs (old: {
                patches = old.patches ++ [./libdaemon-typedef-right-order.patch];
            });

            # libhwy's logical_tests never finished linking (?? no idea what this is)
            libhwy = super.libhwy.overrideAttrs (old: {
                doCheck = false;
            });

            zeromq = super.zeromq.overrideAttrs (old: {
                # this is applied in upstream nixpkgs, so for future proofing
                # this must be checked to not be applied twice.
                patches = (old.patches or []) ++ [
                    # Backport gcc-13 fix:
                    #   https://github.com/zeromq/libzmq/pull/4480
                    (self.fetchpatch {
                        name = "gcc-13.patch";
                        url  = "https://github.com/zeromq/libzmq/commit/438d5d88392baffa6c2c5e0737d9de19d6686f0d.patch";
                        hash = "sha256-tSTYSrQzgnfbY/70QhPdOnpEXX05VAYwVYuW8P1LWf0=";
                    })
                ];
            });

            openexr = super.openexr.overrideAttrs (old: {
                patches = old.patches ++ [./0001-openexr-include-cstdint.patch];
            });

            # the elfutils tests cannot find rpm2cpio. rpm depends on rpm2cpio
            # so either the rpm source must be impure, or we must skip the
            # test. The latter is easier for this purpose.
            elfutils = super.elfutils.overrideAttrs (_: {
                doCheck = false;
                doInstallCheck = false;
            });
        })

        # on gcc13 the llvm tree does not build, which is a known problem and fixed with a small patch
        # https://github.com/JuliaLang/llvm-project/commit/ff1681ddb303223973653f7f5f3f3435b48a1983
        #
        # compiler-rt is problematic because -ffreestanding removes
        # declarations like free, malloc, strtoull from stdlib.h. The flag is
        # used to disable a particular optimization in clang but only gcc is
        # used, and the sanitizers (where this happen) are not interesting.
        # 
        # running the tests takes for ever and is pretty uninteresting for this
        # experiment so they are disabled too.
        (self: super:
            let
                patch = pkg: patches: pkg.overrideAttrs (old: {
                    patches = (old.patches or []) ++ patches;
                    doCheck = false;
                });
                llvm-patches        = [./0001-include-cstdint-llvm11.patch];
                compiler-rt-patches = [./0001-compiler-rt-no-ffreestanding.patch];
            in {
                llvmPackages_11 = super.llvmPackages_11 // {
                    libraries = super.llvmPackages_11.libraries.extend(lfinal: lprev: {
                        libcxx      = patch lprev.libcxx    llvm-patches;
                        libcxxabi   = patch lprev.libcxxabi llvm-patches;
                        compiler-rt-libc    = patch lprev.compiler-rt-libc    compiler-rt-patches;
                        compiler-rt-no-libc = patch lprev.compiler-rt-no-libc compiler-rt-patches;
                    });
                    tools = super.llvmPackages_11.tools.extend(lfinal: lprev: {
                        libllvm  = patch lprev.libllvm llvm-patches;
                        libcxxClang = lprev.libcxxClang.overrideAttrs (old: {
                            setupHooks = old.setupHooks ++ [./no-fprofile.sh];
                        });
                        libstdcxxClang = lprev.libstdcxxClang.overrideAttrs (old: {
                            setupHooks = old.setupHooks ++ [./no-fprofile.sh];
                        });
                    });

                    # we also bring in the other attrs because they still
                    # capture the old package. Doing this manually is not great
                    # and there should be a programmatic solution
                    inherit (self.llvmPackages_11.tools)     libllvm clang;
                    inherit (self.llvmPackages_11.libraries) compiler-rt;
                };
            }
        )

        # python's cffi tests building modules and check that only the expected
        # artifacts are produced, which breaks with gcov because the .gcno
        # files are generated too. It also adds extra symbols cffi does not
        # expect, so the no_unknown_exported_symbols test fails.
        (self: super: {
            python310 = super.python310.override {
                packageOverrides = pyself: pysuper: {
                    cffi = pysuper.cffi.overrideAttrs(old: {
                        disabledTests = old.disabledTests ++ [
                            "test_no_unknown_exported_symbols"
                            "test_api_compile_1"
                            "test_api_compile_2"
                            "test_api_compile_3"
                            "test_api_compile_explicit_target_1"
                            "test_api_compile_explicit_target_2"
                            "test_api_compile_explicit_target_3"
                        ];
                    });
                };
            };
        })

        # valgrind, systemd builds with -fprofile-coverage which is all good,
        # but the link fails because they do not link to libc (-nostdlib) which
        # means the fwrite etc. calls in libgcov do not resolve. Since we have
        # the information we need at this point we can just build them without
        # mcdc.
        (self: super: 
            let no-fcondition-coverage = pkg: pkg.overrideDerivation (old: {
                    NIX_CFLAGS_COMPILE = builtins.replaceStrings
                        ["--coverage" "-fcondition-coverage"] ["" ""]
                        old.NIX_CFLAGS_COMPILE;
                });
            in {
                valgrind = no-fcondition-coverage super.valgrind;
                systemd  = no-fcondition-coverage super.systemd;
                gnu-efi  = no-fcondition-coverage super.gnu-efi;
        })
    ];
    config.replaceStdenv = { pkgs, ... }: pkgs.gcc13Stdenv;
}}:

pkgs.mkShell { 
    inputs = with pkgs; [
        tree
        cpio
        git
        icu
        openjdk
    ];
}

This uses mkShell, but it is quite easy to transform it into mkDerivation. mkShell suited my workflow at the time, so I kept it around. Running nix-shell default.nix would build (re)build a gcc snapshot, and build tree, cpio, git, icu, and openjdk and every build- and runtime dependency of these targets with the -fcondition-coverage flag. That is a huge list of software, which includes perl and llvm. In order to test my latest snapshot, all I had to do was replace the hash (gcc13cc.src.sha256). I found a lot of defects that way (openssl was particularly good at creating surprising control flow graphs), and by the time openjdk built I was quite confident that at least the cfg analysis phase of my program worked well. Every defect I found made its way into the test suite. More targets could be added to

Some packages needed a few tweaks in order to build. Nothing too bad, and occasionally lazily fixed with patches. Those problems were just worked out one-by-one as they stopped the build.

Patches and script: