ik_llama.cpp/.devops/nix/package.nix

{
  lib,
  glibc,
  config,
  stdenv,
  runCommand,
  cmake,
  ninja,
  pkg-config,
  git,
  mpi,
  blas,
  cudaPackages,
  autoAddDriverRunpath,
  darwin,
  rocmPackages,
  vulkan-headers,
  vulkan-loader,
  curl,
  shaderc,
  useBlas ? builtins.all (x: !x) [
    useCuda
    useMetalKit
    useRocm
    useVulkan
  ] && blas.meta.available,
  useCuda ? config.cudaSupport,
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
  useMpi ? false, # Increases the runtime closure size by ~700M
  useRocm ? config.rocmSupport,
  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
  useVulkan ? false,
  useRpc ? false,
  enableCurl ? true,
  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake

  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  precompileMetalShaders ? false
}@inputs:

let
  inherit (lib)
    cmakeBool
    cmakeFeature
    optionalAttrs
    optionals
    strings
    ;

  stdenv = throw "Use effectiveStdenv instead";

  suffices =
    lib.optionals useBlas [ "BLAS" ]
    ++ lib.optionals useCuda [ "CUDA" ]
    ++ lib.optionals useMetalKit [ "MetalKit" ]
    ++ lib.optionals useMpi [ "MPI" ]
    ++ lib.optionals useRocm [ "ROCm" ]
    ++ lib.optionals useVulkan [ "Vulkan" ];

  pnameSuffix =
    strings.optionalString (suffices != [ ])
      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  descriptionSuffix =
    strings.optionalString (suffices != [ ])
      ", accelerated with ${strings.concatStringsSep ", " suffices}";

  xcrunHost = runCommand "xcrunHost" {} ''
    mkdir -p $out/bin
    ln -s /usr/bin/xcrun $out/bin
  '';

  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  # separately
  darwinBuildInputs =
    with darwin.apple_sdk.frameworks;
    [
      Accelerate
      CoreVideo
      CoreGraphics
    ]
    ++ optionals useMetalKit [ MetalKit ];

  cudaBuildInputs = with cudaPackages; [
    cuda_cudart
    cuda_cccl # <nv/target>
    libcublas
  ];

  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
    rocblas
  ];

  vulkanBuildInputs = [
    vulkan-headers
    vulkan-loader
    shaderc
  ];
in

effectiveStdenv.mkDerivation (
  finalAttrs: {
    pname = "llama-cpp${pnameSuffix}";
    version = llamaVersion;

    # Note: none of the files discarded here are visible in the sandbox or
    # affect the output hash. This also means they can be modified without
    # triggering a rebuild.
    src = lib.cleanSourceWith {
      filter =
        name: type:
        let
          noneOf = builtins.all (x: !x);
          baseName = baseNameOf name;
        in
        noneOf [
          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
          (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
          (lib.hasPrefix "." baseName) # Skip hidden files and directories
          (baseName == "flake.lock")
        ];
      src = lib.cleanSource ../../.;
    };

    postPatch = ''
      substituteInPlace ./ggml/src/ggml-metal.m \
        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
      substituteInPlace ./ggml/src/ggml-metal.m \
        --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
    '';

    # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
    # `default.metallib` may be compiled with Metal compiler from XCode
    # and we need to escape sandbox on MacOS to access Metal compiler.
    # `xcrun` is used find the path of the Metal compiler, which is varible
    # and not on $PATH
    # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
    __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;

    nativeBuildInputs =
      [
        cmake
        ninja
        pkg-config
        git
      ]
      ++ optionals useCuda [
        cudaPackages.cuda_nvcc
        autoAddDriverRunpath
      ]
      ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
        glibc.static
      ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
        xcrunHost
      ];

    buildInputs =
      optionals effectiveStdenv.isDarwin darwinBuildInputs
      ++ optionals useCuda cudaBuildInputs
      ++ optionals useMpi [ mpi ]
      ++ optionals useRocm rocmBuildInputs
      ++ optionals useBlas [ blas ]
      ++ optionals useVulkan vulkanBuildInputs
      ++ optionals enableCurl [ curl ];

    cmakeFlags =
      [
        (cmakeBool "LLAMA_BUILD_SERVER" true)
        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
        (cmakeBool "LLAMA_CURL" enableCurl)
        (cmakeBool "GGML_NATIVE" false)
        (cmakeBool "GGML_BLAS" useBlas)
        (cmakeBool "GGML_CUDA" useCuda)
        (cmakeBool "GGML_HIPBLAS" useRocm)
        (cmakeBool "GGML_METAL" useMetalKit)
        (cmakeBool "GGML_VULKAN" useVulkan)
        (cmakeBool "GGML_STATIC" enableStatic)
        (cmakeBool "GGML_RPC" useRpc)
      ]
      ++ optionals useCuda [
        (
          with cudaPackages.flags;
          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
          )
        )
      ]
      ++ optionals useRocm [
        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
      ]
      ++ optionals useMetalKit [
        (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
        (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
      ];

    # Environment variables needed for ROCm
    env = optionalAttrs useRocm {
      ROCM_PATH = "${rocmPackages.clr}";
      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
    };

    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
    # if they haven't been added yet.
    postInstall = ''
      mkdir -p $out/include
      cp $src/include/llama.h $out/include/
    '';

    passthru = {
      inherit
        useBlas
        useCuda
        useMetalKit
        useMpi
        useRocm
        useVulkan
        ;
    };

    meta = {
      # Configurations we don't want even the CI to evaluate. Results in the
      # "unsupported platform" messages. This is mostly a no-op, because
      # cudaPackages would've refused to evaluate anyway.
      badPlatforms = optionals useCuda lib.platforms.darwin;

      # Configurations that are known to result in build failures. Can be
      # overridden by importing Nixpkgs with `allowBroken = true`.
      broken = (useMetalKit && !effectiveStdenv.isDarwin);

      description = "ik_llama.cpp: llama.cpp fork with better CPU performance${descriptionSuffix}";
      homepage = "https://github.com/ikawrakow/ik_llama.cpp/";
      license = lib.licenses.mit;

      # Accommodates `nix run` and `lib.getExe`
      mainProgram = "llama-cli";

      # Extend `badPlatforms` instead
      platforms = lib.platforms.all;
    };
  }
)