diff options
author | Connor Baker <connor.baker@tweag.io> | 2023-11-07 14:35:11 +0000 |
---|---|---|
committer | Connor Baker <connor.baker@tweag.io> | 2023-12-07 16:45:54 +0000 |
commit | 8e800cedaf24f5ad9717463b809b0beef7677000 (patch) | |
tree | 6cd091e8af79bf9d036b76f87105047143b620b9 /pkgs/development/cuda-modules | |
parent | 397d95d07fd095a3fba459a694bd284be3c47899 (diff) |
cudaPackages: move derivations to cuda-modules & support aarch64
cudaPackages.cuda_compat: ignore missing libs provided at runtime
cudaPackages.gpus: Jetson should never build by default
cudaPackages.flags: don't build Jetson capabilities by default
cudaPackages: re-introduce filter for pre-existing CUDA redist packages in overrides
cudaPackages: only recurseIntoAttrs for the latest of each major version
cudaPackages.nvccCompatabilities: use GCC 10 through CUDA 11.5 to avoid a GLIBC incompatability
cudaPackages.cutensor: acquire libcublas through cudatoolkit prior to 11.4
cudaPackages.cuda_compat: mark as broken on aarch64-linux if not targeting Jetson
cudaPackages.cutensor_1_4: fix build
cudaPackages: adjust use of autoPatchelfIgnoreMissingDeps
cudaPackages.cuda_nvprof: remove unecessary override to add addOpenGLRunpath
cudaPackages: use getExe' to avoid patchelf warning about missing meta.mainProgram
cudaPackages: fix evaluation with Nix 2.3
cudaPackages: fix platform detection for Jetson/non-Jetson aarch64-linux
python3Packages.tensorrt: mark as broken if required packages are missing
Note: evaluating the name of the derivation will fail if tensorrt is not present,
which is why we wrap the value in `lib.optionalString`.
cudaPackages.flags.getNixSystem: add guard based on jetsonTargets
cudaPackages.cudnn: use explicit path to patchelf
cudaPackages.tensorrt: use explicit path to patchelf
Diffstat (limited to 'pkgs/development/cuda-modules')
40 files changed, 2326 insertions, 29 deletions
diff --git a/pkgs/development/cuda-modules/backend-stdenv.nix b/pkgs/development/cuda-modules/backend-stdenv.nix new file mode 100644 index 000000000000..10fedd1e6f27 --- /dev/null +++ b/pkgs/development/cuda-modules/backend-stdenv.nix @@ -0,0 +1,39 @@ +{ + lib, + nvccCompatibilities, + cudaVersion, + buildPackages, + overrideCC, + stdenv, + wrapCCWith, +}: +let + gccMajorVersion = nvccCompatibilities.${cudaVersion}.gccMaxMajorVersion; + # We use buildPackages (= pkgsBuildHost) because we look for a gcc that + # runs on our build platform, and that produces executables for the host + # platform (= platform on which we deploy and run the downstream packages). + # The target platform of buildPackages.gcc is our host platform, so its + # .lib output should be the libstdc++ we want to be writing in the runpaths + # Cf. https://github.com/NixOS/nixpkgs/pull/225661#discussion_r1164564576 + nixpkgsCompatibleLibstdcxx = buildPackages.gcc.cc.lib; + nvccCompatibleCC = buildPackages."gcc${gccMajorVersion}".cc; + + cc = wrapCCWith { + cc = nvccCompatibleCC; + + # This option is for clang's libcxx, but we (ab)use it for gcc's libstdc++. + # Note that libstdc++ maintains forward-compatibility: if we load a newer + # libstdc++ into the process, we can still use libraries built against an + # older libstdc++. This, in practice, means that we should use libstdc++ from + # the same stdenv that the rest of nixpkgs uses. + # We currently do not try to support anything other than gcc and linux. + libcxx = nixpkgsCompatibleLibstdcxx; + }; + cudaStdenv = overrideCC stdenv cc; + passthruExtra = { + inherit nixpkgsCompatibleLibstdcxx; + # cc already exposed + }; + assertCondition = true; +in +lib.extendDerivation assertCondition passthruExtra cudaStdenv diff --git a/pkgs/development/cuda-modules/cuda/extension.nix b/pkgs/development/cuda-modules/cuda/extension.nix new file mode 100644 index 000000000000..20ec90d05846 --- /dev/null +++ b/pkgs/development/cuda-modules/cuda/extension.nix @@ -0,0 +1,101 @@ +{cudaVersion, lib}: +let + inherit (lib) attrsets modules trivial; + redistName = "cuda"; + + # Manifest files for CUDA redistributables (aka redist). These can be found at + # https://developer.download.nvidia.com/compute/cuda/redist/ + # Maps a cuda version to the specific version of the manifest. + cudaVersionMap = { + "11.4" = "11.4.4"; + "11.5" = "11.5.2"; + "11.6" = "11.6.2"; + "11.7" = "11.7.1"; + "11.8" = "11.8.0"; + "12.0" = "12.0.1"; + "12.1" = "12.1.1"; + "12.2" = "12.2.2"; + }; + + # Check if the current CUDA version is supported. + cudaVersionMappingExists = builtins.hasAttr cudaVersion cudaVersionMap; + + # fullCudaVersion : String + fullCudaVersion = cudaVersionMap.${cudaVersion}; + + evaluatedModules = modules.evalModules { + modules = [ + ../modules + # We need to nest the manifests in a config.cuda.manifests attribute so the + # module system can evaluate them. + { + cuda.manifests = { + redistrib = trivial.importJSON (./manifests + "/redistrib_${fullCudaVersion}.json"); + feature = trivial.importJSON (./manifests + "/feature_${fullCudaVersion}.json"); + }; + } + ]; + }; + + # Generally we prefer to do things involving getting attribute names with feature_manifest instead + # of redistrib_manifest because the feature manifest will have *only* the redist architecture + # names as the keys, whereas the redistrib manifest will also have things like version, name, license, + # and license_path. + featureManifest = evaluatedModules.config.cuda.manifests.feature; + redistribManifest = evaluatedModules.config.cuda.manifests.redistrib; + + # Builder function which builds a single redist package for a given platform. + # buildRedistPackage : callPackage -> PackageName -> Derivation + buildRedistPackage = + callPackage: pname: + let + redistribRelease = redistribManifest.${pname}; + featureRelease = featureManifest.${pname}; + drv = + (callPackage ../generic-builders/manifest.nix { + # We pass the whole release to the builder because it has logic to handle + # the case we're trying to build on an unsupported platform. + inherit + pname + redistName + redistribRelease + featureRelease + ; + }).overrideAttrs + ( + prevAttrs: { + # Add the package-specific license. + meta = prevAttrs.meta // { + license = + let + licensePath = + if redistribRelease.license_path != null then + redistribRelease.license_path + else + "${pname}/LICENSE.txt"; + url = "https://developer.download.nvidia.com/compute/cuda/redist/${licensePath}"; + in + lib.licenses.nvidiaCudaRedist // {inherit url;}; + }; + } + ); + in + drv; + + # Build all the redist packages given final and prev. + redistPackages = + final: _prev: + # Wrap the whole thing in an optionalAttrs so we can return an empty set if the CUDA version + # is not supported. + # NOTE: We cannot include the call to optionalAttrs *in* the pipe as we would strictly evaluate the + # attrNames before we check if the CUDA version is supported. + attrsets.optionalAttrs cudaVersionMappingExists ( + trivial.pipe featureManifest [ + # Get all the package names + builtins.attrNames + # Build the redist packages + (trivial.flip attrsets.genAttrs (buildRedistPackage final.callPackage)) + ] + ); +in +redistPackages diff --git a/pkgs/development/cuda-modules/cuda/overrides.nix b/pkgs/development/cuda-modules/cuda/overrides.nix new file mode 100644 index 000000000000..061d5da16bb5 --- /dev/null +++ b/pkgs/development/cuda-modules/cuda/overrides.nix @@ -0,0 +1,129 @@ +{cudaVersion, lib}: +let + inherit (lib) attrsets lists strings; + # cudaVersionOlder : Version -> Boolean + cudaVersionOlder = strings.versionOlder cudaVersion; + # cudaVersionAtLeast : Version -> Boolean + cudaVersionAtLeast = strings.versionAtLeast cudaVersion; + + addBuildInputs = + drv: buildInputs: + drv.overrideAttrs (prevAttrs: {buildInputs = prevAttrs.buildInputs ++ buildInputs;}); +in +# NOTE: Filter out attributes that are not present in the previous version of +# the package set. This is necessary to prevent the appearance of attributes +# like `cuda_nvcc` in `cudaPackages_10_0, which predates redistributables. +final: prev: +attrsets.filterAttrs (attr: _: (builtins.hasAttr attr prev)) { + libcufile = prev.libcufile.overrideAttrs ( + prevAttrs: { + buildInputs = prevAttrs.buildInputs ++ [ + final.libcublas.lib + final.pkgs.numactl + final.pkgs.rdma-core + ]; + # Before 11.7 libcufile depends on itself for some reason. + env.autoPatchelfIgnoreMissingDeps = + prevAttrs.env.autoPatchelfIgnoreMissingDeps + + strings.optionalString (cudaVersionOlder "11.7") " libcufile.so.0"; + } + ); + + libcusolver = addBuildInputs prev.libcusolver ( + # Always depends on this + [final.libcublas.lib] + # Dependency from 12.0 and on + ++ lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib] + # Dependency from 12.1 and on + ++ lists.optionals (cudaVersionAtLeast "12.1") [final.libcusparse.lib] + ); + + libcusparse = addBuildInputs prev.libcusparse ( + lists.optionals (cudaVersionAtLeast "12.0") [final.libnvjitlink.lib] + ); + + cuda_compat = prev.cuda_compat.overrideAttrs ( + prevAttrs: { + env.autoPatchelfIgnoreMissingDeps = + prevAttrs.env.autoPatchelfIgnoreMissingDeps + " libnvrm_gpu.so libnvrm_mem.so"; + # `cuda_compat` only works on aarch64-linux, and only when building for Jetson devices. + brokenConditions = prevAttrs.brokenConditions // { + "Trying to use cuda_compat on aarch64-linux targeting non-Jetson devices" = + !final.flags.isJetsonBuild; + }; + } + ); + + cuda_gdb = addBuildInputs prev.cuda_gdb ( + # x86_64 only needs gmp from 12.0 and on + lists.optionals (cudaVersionAtLeast "12.0") [final.pkgs.gmp] + ); + + cuda_nvcc = prev.cuda_nvcc.overrideAttrs ( + oldAttrs: { + propagatedBuildInputs = [final.setupCudaHook]; + + meta = (oldAttrs.meta or {}) // { + mainProgram = "nvcc"; + }; + } + ); + + cuda_nvprof = prev.cuda_nvprof.overrideAttrs ( + prevAttrs: {buildInputs = prevAttrs.buildInputs ++ [final.cuda_cupti.lib];} + ); + + cuda_demo_suite = addBuildInputs prev.cuda_demo_suite [ + final.pkgs.freeglut + final.pkgs.libGLU + final.pkgs.libglvnd + final.pkgs.mesa + final.libcufft.lib + final.libcurand.lib + ]; + + nsight_compute = prev.nsight_compute.overrideAttrs ( + prevAttrs: { + nativeBuildInputs = + prevAttrs.nativeBuildInputs + ++ ( + if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then + [final.pkgs.qt5.wrapQtAppsHook] + else + [final.pkgs.qt6.wrapQtAppsHook] + ); + buildInputs = + prevAttrs.buildInputs + ++ ( + if (strings.versionOlder prev.nsight_compute.version "2022.2.0") then + [final.pkgs.qt5.qtwebview] + else + [final.pkgs.qt6.qtwebview] + ); + } + ); + + nsight_systems = prev.nsight_systems.overrideAttrs ( + prevAttrs: { + nativeBuildInputs = prevAttrs.nativeBuildInputs ++ [final.pkgs.qt5.wrapQtAppsHook]; + buildInputs = prevAttrs.buildInputs ++ [ + final.pkgs.alsa-lib + final.pkgs.e2fsprogs + final.pkgs.nss + final.pkgs.numactl + final.pkgs.pulseaudio + final.pkgs.wayland + final.pkgs.xorg.libXcursor + final.pkgs.xorg.libXdamage + final.pkgs.xorg.libXrandr + final.pkgs.xorg.libXtst + ]; + } + ); + + nvidia_driver = prev.nvidia_driver.overrideAttrs { + # No need to support this package as we have drivers already + # in linuxPackages. + meta.broken = true; + }; +} diff --git a/pkgs/development/cuda-modules/cudatoolkit/default.nix b/pkgs/development/cuda-modules/cudatoolkit/default.nix new file mode 100644 index 000000000000..e0868e2b1c2c --- /dev/null +++ b/pkgs/development/cuda-modules/cudatoolkit/default.nix @@ -0,0 +1,374 @@ +{ + cudaVersion, + runPatches ? [], + autoPatchelfHook, + autoAddOpenGLRunpathHook, + addOpenGLRunpath, + alsa-lib, + curlMinimal, + expat, + fetchurl, + fontconfig, + freetype, + gdk-pixbuf, + glib, + glibc, + gst_all_1, + gtk2, + lib, + libxkbcommon, + libkrb5, + krb5, + makeWrapper, + markForCudatoolkitRootHook, + ncurses5, + numactl, + nss, + patchelf, + perl, + python3, # FIXME: CUDAToolkit 10 may still need python27 + pulseaudio, + setupCudaHook, + stdenv, + backendStdenv, # E.g. gcc11Stdenv, set in extension.nix + unixODBC, + wayland, + xorg, + zlib, + freeglut, + libGLU, + libsForQt5, + libtiff, + qt6Packages, + qt6, + rdma-core, + ucx, + rsync, +}: + +let + # Version info for the classic cudatoolkit packages that contain everything that is in redist. + releases = builtins.import ./releases.nix; + release = releases.${cudaVersion}; +in + +backendStdenv.mkDerivation rec { + pname = "cudatoolkit"; + inherit (release) version; + inherit runPatches; + + dontPatchELF = true; + dontStrip = true; + + src = fetchurl {inherit (release) url sha256;}; + + outputs = [ + "out" + "lib" + "doc" + ]; + + nativeBuildInputs = + [ + perl + makeWrapper + rsync + addOpenGLRunpath + autoPatchelfHook + autoAddOpenGLRunpathHook + markForCudatoolkitRootHook + ] + ++ lib.optionals (lib.versionOlder version "11") [libsForQt5.wrapQtAppsHook] + ++ lib.optionals (lib.versionAtLeast version "11.8") [qt6Packages.wrapQtAppsHook]; + depsTargetTargetPropagated = [setupCudaHook]; + buildInputs = + lib.optionals (lib.versionOlder version "11") [ + libsForQt5.qt5.qtwebengine + freeglut + libGLU + ] + ++ [ + # To get $GDK_PIXBUF_MODULE_FILE via setup-hook + gdk-pixbuf + + # For autoPatchelf + ncurses5 + expat + python3 + zlib + glibc + xorg.libX11 + xorg.libXext + xorg.libXrender + xorg.libXt + xorg.libXtst + xorg.libXi + xorg.libXext + xorg.libXdamage + xorg.libxcb + xorg.xcbutilimage + xorg.xcbutilrenderutil + xorg.xcbutilwm + xorg.xcbutilkeysyms + pulseaudio + libxkbcommon + libkrb5 + krb5 + gtk2 + glib + fontconfig + freetype + numactl + nss + unixODBC + alsa-lib + wayland + ] + ++ lib.optionals (lib.versionAtLeast version "11.8") [ + (lib.getLib libtiff) + qt6Packages.qtwayland + rdma-core + (ucx.override {enableCuda = false;}) # Avoid infinite recursion + xorg.libxshmfence + xorg.libxkbfile + ] + ++ (lib.optionals (lib.versionAtLeast version "12") ( + map lib.getLib ([ + # Used by `/target-linux-x64/CollectX/clx` and `/target-linux-x64/CollectX/libclx_api.so` for: + # - `libcurl.so.4` + curlMinimal + + # Used by `/host-linux-x64/Scripts/WebRTCContainer/setup/neko/server/bin/neko` + gst_all_1.gstreamer + gst_all_1.gst-plugins-base + ]) + ++ ( + with qt6; [ + qtmultimedia + qttools + qtpositioning + qtscxml + qtsvg + qtwebchannel + qtwebengine + ] + ) + )); + + # Prepended to runpaths by autoPatchelf. + # The order inherited from older rpath preFixup code + runtimeDependencies = [ + (placeholder "lib") + (placeholder "out") + "${placeholder "out"}/nvvm" + # NOTE: use the same libstdc++ as the rest of nixpkgs, not from backendStdenv + "${lib.getLib stdenv.cc.cc}/lib64" + "${placeholder "out"}/jre/lib/amd64/jli" + "${placeholder "out"}/lib64" + "${placeholder "out"}/nvvm/lib64" + ]; + + autoPatchelfIgnoreMissingDeps = [ + # This is the hardware-dependent userspace driver that comes from + # nvidia_x11 package. It must be deployed at runtime in + # /run/opengl-driver/lib or pointed at by LD_LIBRARY_PATH variable, rather + # than pinned in runpath + "libcuda.so.1" + + # The krb5 expression ships libcom_err.so.3 but cudatoolkit asks for the + # older + # This dependency is asked for by target-linux-x64/CollectX/RedHat/x86_64/libssl.so.10 + # - do we even want to use nvidia-shipped libssl? + "libcom_err.so.2" + ]; + + preFixup = + if lib.versionOlder version "11" then + '' + ${lib.getExe' patchelf "patchelf"} $out/targets/*/lib/libnvrtc.so --add-needed libnvrtc-builtins.so + '' + else + '' + ${lib.getExe' patchelf "patchelf"} $out/lib64/libnvrtc.so --add-needed libnvrtc-builtins.so + ''; + + unpackPhase = '' + sh $src --keep --noexec + + ${lib.optionalString (lib.versionOlder version "10.1") '' + cd pkg/run_files + sh cuda-linux*.run --keep --noexec + sh cuda-samples*.run --keep --noexec + mv pkg ../../$(basename $src) + cd ../.. + rm -rf pkg + + for patch in $runPatches; do + sh $patch --keep --noexec + mv pkg $(basename $patch) + done + ''} + ''; + + installPhase = + '' + runHook preInstall + mkdir $out + ${lib.optionalString (lib.versionOlder version "10.1") '' + cd $(basename $src) + export PERL5LIB=. + perl ./install-linux.pl --prefix="$out" + cd .. + for patch in $runPatches; do + cd $(basename $patch) + perl ./install_patch.pl --silent --accept-eula --installdir="$out" + cd .. + done + ''} + ${lib.optionalString (lib.versionAtLeast version "10.1" && lib.versionOlder version "11") '' + cd pkg/builds/cuda-toolkit + mv * $out/ + ''} + ${lib.optionalString (lib.versionAtLeast version "11") '' + mkdir -p $out/bin $out/lib64 $out/include $doc + for dir in pkg/builds/* pkg/builds/cuda_nvcc/nvvm pkg/builds/cuda_cupti/extras/CUPTI; do + if [ -d $dir/bin ]; then + mv $dir/bin/* $out/bin + fi + if [ -d $dir/doc ]; then + (cd $dir/doc && find . -type d -exec mkdir -p $doc/\{} \;) + (cd $dir/doc && find . \( -type f -o -type l \) -exec mv \{} $doc/\{} \;) + fi + if [ -L $dir/include ] || [ -d $dir/include ]; then + (cd $dir/include && find . -type d -exec mkdir -p $out/include/\{} \;) + (cd $dir/include && find . \( -type f -o -type l \) -exec mv \{} $out/include/\{} \;) + fi + if [ -L $dir/lib64 ] || [ -d $dir/lib64 ]; then + (cd $dir/lib64 && find . -type d -exec mkdir -p $out/lib64/\{} \;) + (cd $dir/lib64 && find . \( -type f -o -type l \) -exec mv \{} $out/lib64/\{} \;) + fi + done + mv pkg/builds/cuda_nvcc/nvvm $out/nvvm + + mv pkg/builds/cuda_sanitizer_api $out/cuda_sanitizer_api + ln -s $out/cuda_sanitizer_api/compute-sanitizer/compute-sanitizer $out/bin/compute-sanitizer + + mv pkg/builds/nsight_systems/target-linux-x64 $out/target-linux-x64 + mv pkg/builds/nsight_systems/host-linux-x64 $out/host-linux-x64 + rm $out/host-linux-x64/libstdc++.so* + ''} + ${ + lib.optionalString (lib.versionAtLeast version "11.8") + # error: auto-patchelf could not satisfy dependency libtiff.so.5 wanted by /nix/store/.......-cudatoolkit-12.0.1/host-linux-x64/Plugins/imageformats/libqtiff.so + # we only ship libtiff.so.6, so let's use qt plugins built by Nix. + # TODO: don't copy, come up with a symlink-based "merge" + '' + rsync ${lib.getLib qt6Packages.qtimageformats}/lib/qt-6/plugins/ $out/host-linux-x64/Plugins/ -aP + '' + } + + rm -f $out/tools/CUDA_Occupancy_Calculator.xls # FIXME: why? + + ${lib.optionalString (lib.versionOlder version "10.1") '' + # let's remove the 32-bit libraries, they confuse the lib64->lib mover + rm -rf $out/lib + ''} + + ${lib.optionalString (lib.versionAtLeast version "12.0") '' + rm $out/host-linux-x64/libQt6* + ''} + + # Remove some cruft. + ${lib.optionalString ((lib.versionAtLeast version "7.0") && (lib.versionOlder version "10.1")) + "rm $out/bin/uninstall*"} + + # Fixup path to samples (needed for cuda 6.5 or else nsight will not find them) + if [ -d "$out"/cuda-samples ]; then + mv "$out"/cuda-samples "$out"/samples + fi + + # Change the #error on GCC > 4.9 to a #warning. + sed -i $out/include/host_config.h -e 's/#error\(.*unsupported GNU version\)/#warning\1/' + + # Fix builds with newer glibc version + sed -i "1 i#define _BITS_FLOATN_H" "$out/include/host_defines.h" + '' + + + # Point NVCC at a compatible compiler + # CUDA_TOOLKIT_ROOT_DIR is legacy, + # Cf. https://cmake.org/cmake/help/latest/module/FindCUDA.html#input-variables + '' + mkdir -p $out/nix-support + cat <<EOF >> $out/nix-support/setup-hook + cmakeFlags+=' -DCUDA_TOOLKIT_ROOT_DIR=$out' + EOF + + # Move some libraries to the lib output so that programs that + # depend on them don't pull in this entire monstrosity. + mkdir -p $lib/lib + mv -v $out/lib64/libcudart* $lib/lib/ + + # Remove OpenCL libraries as they are provided by ocl-icd and driver. + rm -f $out/lib64/libOpenCL* + ${lib.optionalString (lib.versionAtLeast version "10.1" && (lib.versionOlder version "11")) '' + mv $out/lib64 $out/lib + mv $out/extras/CUPTI/lib64/libcupti* $out/lib + ''} + + # nvprof do not find any program to profile if LD_LIBRARY_PATH is not set + wrapProgram $out/bin/nvprof \ + --prefix LD_LIBRARY_PATH : $out/lib + '' + + lib.optionalString (lib.versionOlder version "8.0") '' + # Hack to fix building against recent Glibc/GCC. + echo "NIX_CFLAGS_COMPILE+=' -D_FORCE_INLINES'" >> $out/nix-support/setup-hook + '' + # 11.8 includes a broken symlink, include/include, pointing to targets/x86_64-linux/include + + lib.optionalString (lib.versions.majorMinor version == "11.8") '' + rm $out/include/include + '' + + '' + runHook postInstall + ''; + + postInstall = '' + for b in nvvp ${lib.optionalString (lib.versionOlder version "11") "nsight"}; do + wrapProgram "$out/bin/$b" \ + --set GDK_PIXBUF_MODULE_FILE "$GDK_PIXBUF_MODULE_FILE" + done + ''; + + # cuda-gdb doesn't run correctly when not using sandboxing, so + # temporarily disabling the install check. This should be set to true + # when we figure out how to get `cuda-gdb --version` to run correctly + # when not using sandboxing. + doInstallCheck = false; + postInstallCheck = '' + # Smoke test binaries + pushd $out/bin + for f in *; do + case $f in + crt) continue;; + nvcc.profile) continue;; + nsight_ee_plugins_manage.sh) continue;; + uninstall_cuda_toolkit_6.5.pl) continue;; + |