Update nix flake: sync with upstream, fix for newer nixpkgs (#1371)

* Update nix flake: sync with upstream, fix for newer nixpkgs - Sync package.nix with upstream: add rocmGpuTargets/useRpc params, fix env optionals→optionalAttrs, remove unused inputs - Rewrite devshells.nix to use mkShell directly instead of passthru - Fix CUDA license check in nixpkgs-instances.nix for list-type licenses - Update flake inputs (nixpkgs, flake-parts) to latest - Update references from ggerganov/llama.cpp to ikawrakow/ik_llama.cpp Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Apply nix review suggestions from baileylu121 (#1371) - Use legacyPackages instead of import to avoid extra nixpkgs instances - Use inherit (pkgs) stdenv idiom - Remove unnecessary lib.pipe wrapper in devshells.nix - Simplify license normalization with lib.toList in nixpkgs-instances.nix Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
2026-06-28 04:30:15 -05:00 · 2026-03-24 16:17:52 +09:00 · 2026-03-24 16:17:52 +09:00 · f6c8b5f2cb
commit f6c8b5f2cb
parent f4125e8b1f
5 changed files with 75 additions and 151 deletions
--- a/.devops/nix/devshells.nix
+++ b/.devops/nix/devshells.nix
@ -1,13 +1,44 @@
+{ inputs, ... }:
+
 {
  perSystem =
-    { config, lib, ... }:
+    {
+      config,
+      lib,
+      system,
+      ...
+    }:
    {
      devShells =
-        lib.concatMapAttrs
-          (name: package: {
-            ${name} = package.passthru.shell;
-            ${name + "-extra"} = package.passthru.shell-extra;
-          })
-          config.packages;
+        let
+          pkgs = inputs.nixpkgs.legacyPackages.${system};
+          inherit (pkgs) stdenv;
+        in
+        lib.concatMapAttrs (
+          name: package: {
+            ${name} = pkgs.mkShell {
+              name = "${name}";
+              inputsFrom = [ package ];
+              shellHook = ''
+                echo "Entering ${name} devShell"
+              '';
+            };
+            "${name}-extra" = pkgs.mkShell {
+              name = "${name}-extra";
+              inputsFrom = [ package ];
+              packages = with pkgs.python3Packages; [
+                numpy
+                sentencepiece
+                tiktoken
+                torchWithoutCuda
+                transformers
+              ];
+              shellHook = ''
+                echo "Entering ${name}-extra devShell"
+                addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
+              '';
+            };
+          }
+        ) config.packages;
    };
 }
--- a/.devops/nix/nixpkgs-instances.nix
+++ b/.devops/nix/nixpkgs-instances.nix
@ -4,7 +4,7 @@
  # the module `{ pkgs ... }: { /* config */ }` implicitly uses
  # `_module.args.pkgs` (defined in this case by flake-parts).
  perSystem =
-    { system, ... }:
+    { lib, system, ... }:
    {
      _module.args = {
        # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
@ -26,6 +26,9 @@
          config.cudaSupport = true;
          config.allowUnfreePredicate =
            p:
+            let
+              licenses = lib.toList (p.meta.license or []);
+            in
            builtins.all
              (
                license:
@ -35,7 +38,7 @@
                  "cuDNN EULA"
                ]
              )
-              (p.meta.licenses or [ p.meta.license ]);
+              licenses;
        };
        # Ensure dependencies use ROCm consistently
        pkgsRocm = import inputs.nixpkgs {
--- a/.devops/nix/package.nix
+++ b/.devops/nix/package.nix
@ -3,13 +3,11 @@
  glibc,
  config,
  stdenv,
-  mkShell,
  runCommand,
  cmake,
  ninja,
  pkg-config,
  git,
-  python3,
  mpi,
  blas,
  cudaPackages,
@ -30,8 +28,10 @@
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
  useMpi ? false, # Increases the runtime closure size by ~700M
  useRocm ? config.rocmSupport,
-  enableCurl ? true,
+  rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
  useVulkan ? false,
+  useRpc ? false,
+  enableCurl ? true,
  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake

  # It's necessary to consistently use backendStdenv when building with CUDA support,
@ -45,9 +45,9 @@ let
  inherit (lib)
    cmakeBool
    cmakeFeature
+    optionalAttrs
    optionals
    strings
-    versionOlder
    ;

  stdenv = throw "Use effectiveStdenv instead";
@ -67,49 +67,6 @@ let
    strings.optionalString (suffices != [ ])
      ", accelerated with ${strings.concatStringsSep ", " suffices}";

-  executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
-
-  # TODO: package the Python in this repository in a Nix-like way.
-  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
-  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
-  # https://peps.python.org/pep-0517/
-  #
-  # TODO: Package up each Python script or service appropriately, by making
-  # them into "entrypoints"
-  llama-python = python3.withPackages (
-    ps: [
-      ps.numpy
-      ps.sentencepiece
-    ]
-  );
-
-  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
-  llama-python-extra = python3.withPackages (
-    ps: [
-      ps.numpy
-      ps.sentencepiece
-      ps.tiktoken
-      ps.torchWithoutCuda
-      ps.transformers
-
-      # server bench
-      ps.matplotlib
-
-      # server tests
-      ps.openai
-      ps.behave
-      ps.prometheus-client
-
-      # for examples/pydantic-models-to-grammar-examples.py
-      ps.docstring-parser
-      ps.pydantic
-
-      # for scripts/compare-llama-bench.py
-      ps.gitpython
-      ps.tabulate
-    ]
-  );
-
  xcrunHost = runCommand "xcrunHost" {} ''
    mkdir -p $out/bin
    ln -s /usr/bin/xcrun $out/bin
@ -223,6 +180,7 @@ effectiveStdenv.mkDerivation (
        (cmakeBool "GGML_METAL" useMetalKit)
        (cmakeBool "GGML_VULKAN" useVulkan)
        (cmakeBool "GGML_STATIC" enableStatic)
+        (cmakeBool "GGML_RPC" useRpc)
      ]
      ++ optionals useCuda [
        (
@ -234,7 +192,7 @@ effectiveStdenv.mkDerivation (
      ]
      ++ optionals useRocm [
        (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
-        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
+        (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
      ]
      ++ optionals useMetalKit [
        (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
@ -242,7 +200,7 @@ effectiveStdenv.mkDerivation (
      ];

    # Environment variables needed for ROCm
-    env = optionals useRocm {
+    env = optionalAttrs useRocm {
      ROCM_PATH = "${rocmPackages.clr}";
      HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
    };
@ -254,7 +212,6 @@ effectiveStdenv.mkDerivation (
      cp $src/include/llama.h $out/include/
    '';

-    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
    passthru = {
      inherit
        useBlas
@ -264,23 +221,6 @@ effectiveStdenv.mkDerivation (
        useRocm
        useVulkan
        ;
-
-      shell = mkShell {
-        name = "shell-${finalAttrs.finalPackage.name}";
-        description = "contains numpy and sentencepiece";
-        buildInputs = [ llama-python ];
-        inputsFrom = [ finalAttrs.finalPackage ];
-        shellHook = ''
-          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
-        '';
-      };
-
-      shell-extra = mkShell {
-        name = "shell-extra-${finalAttrs.finalPackage.name}";
-        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
-        buildInputs = [ llama-python-extra ];
-        inputsFrom = [ finalAttrs.finalPackage ];
-      };
    };

    meta = {
@ -293,28 +233,13 @@ effectiveStdenv.mkDerivation (
      # overridden by importing Nixpkgs with `allowBroken = true`.
      broken = (useMetalKit && !effectiveStdenv.isDarwin);

-      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
-      homepage = "https://github.com/ggerganov/llama.cpp/";
+      description = "ik_llama.cpp: llama.cpp fork with better CPU performance${descriptionSuffix}";
+      homepage = "https://github.com/ikawrakow/ik_llama.cpp/";
      license = lib.licenses.mit;

      # Accommodates `nix run` and `lib.getExe`
      mainProgram = "llama-cli";

-      # These people might respond, on the best effort basis, if you ping them
-      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
-      # Consider adding yourself to this list if you want to ensure this flake
-      # stays maintained and you're willing to invest your time. Do not add
-      # other people without their consent. Consider removing people after
-      # they've been unreachable for long periods of time.
-
-      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
-      # an attrset following the same format as in
-      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
-      maintainers = with lib.maintainers; [
-        philiptaron
-        SomeoneSerge
-      ];
-
      # Extend `badPlatforms` instead
      platforms = lib.platforms.all;
    };
--- a/flake.lock
+++ b/flake.lock
@ -5,11 +5,11 @@
        "nixpkgs-lib": "nixpkgs-lib"
      },
      "locked": {
-        "lastModified": 1722555600,
-        "narHash": "sha256-XOQkdLafnb/p9ij77byFQjDf5m5QYl9b2REiVClC+x4=",
+        "lastModified": 1772408722,
+        "narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
        "owner": "hercules-ci",
        "repo": "flake-parts",
-        "rev": "8471fe90ad337a8074e957b69ca4d0089218391d",
+        "rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
        "type": "github"
      },
      "original": {
@ -20,11 +20,11 @@
    },
    "nixpkgs": {
      "locked": {
-        "lastModified": 1723175592,
-        "narHash": "sha256-M0xJ3FbDUc4fRZ84dPGx5VvgFsOzds77KiBMW/mMTnI=",
+        "lastModified": 1772624091,
+        "narHash": "sha256-QKyJ0QGWBn6r0invrMAK8dmJoBYWoOWy7lN+UHzW1jc=",
        "owner": "NixOS",
        "repo": "nixpkgs",
-        "rev": "5e0ca22929f3342b19569b21b2f3462f053e497b",
+        "rev": "80bdc1e5ce51f56b19791b52b2901187931f5353",
        "type": "github"
      },
      "original": {
@ -36,14 +36,17 @@
    },
    "nixpkgs-lib": {
      "locked": {
-        "lastModified": 1722555339,
-        "narHash": "sha256-uFf2QeW7eAHlYXuDktm9c25OxOyCoUOQmh5SZ9amE5Q=",
-        "type": "tarball",
-        "url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz"
+        "lastModified": 1772328832,
+        "narHash": "sha256-e+/T/pmEkLP6BHhYjx6GmwP5ivonQQn0bJdH9YrRB+Q=",
+        "owner": "nix-community",
+        "repo": "nixpkgs.lib",
+        "rev": "c185c7a5e5dd8f9add5b2f8ebeff00888b070742",
+        "type": "github"
      },
      "original": {
-        "type": "tarball",
-        "url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz"
+        "owner": "nix-community",
+        "repo": "nixpkgs.lib",
+        "type": "github"
      }
    },
    "root": {
--- a/flake.nix
+++ b/flake.nix
@ -1,4 +1,4 @@
-# The flake interface to llama.cpp's Nix expressions. The flake is used as a
+# The flake interface to ik_llama.cpp's Nix expressions. The flake is used as a
 # more discoverable entry-point, as well as a way to pin the dependencies and
 # expose default outputs, including the outputs built by the CI.

@ -7,60 +7,24 @@
 # directly:
 #
 # ```nix
-# pkgs.callPackage ${llama-cpp-root}/.devops/nix/scope.nix { }`
+# pkgs.callPackage ${ik-llama-cpp-root}/.devops/nix/scope.nix { }`
 # ```

 # Cf. https://jade.fyi/blog/flakes-arent-real/ for a more detailed exposition
 # of the relation between Nix and the Nix Flakes.
 {
-  description = "Port of Facebook's LLaMA model in C/C++";
+  description = "ik_llama.cpp: llama.cpp fork with better CPU performance";

  inputs = {
    nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
    flake-parts.url = "github:hercules-ci/flake-parts";
  };

-  # There's an optional binary cache available. The details are below, but they're commented out.
-  #
-  # Why? The terrible experience of being prompted to accept them on every single Nix command run.
-  # Plus, there are warnings shown about not being a trusted user on a default Nix install
-  # if you *do* say yes to the prompts.
-  #
-  # This experience makes having `nixConfig` in a flake a persistent UX problem.
-  #
-  # To make use of the binary cache, please add the relevant settings to your `nix.conf`.
-  # It's located at `/etc/nix/nix.conf` on non-NixOS systems. On NixOS, adjust the `nix.settings`
-  # option in your NixOS configuration to add `extra-substituters` and `extra-trusted-public-keys`,
-  # as shown below.
-  #
-  # ```
-  # nixConfig = {
-  #   extra-substituters = [
-  #     # Populated by the CI in ggerganov/llama.cpp
-  #     "https://llama-cpp.cachix.org"
-  #
-  #     # A development cache for nixpkgs imported with `config.cudaSupport = true`.
-  #     # Populated by https://hercules-ci.com/github/SomeoneSerge/nixpkgs-cuda-ci.
-  #     # This lets one skip building e.g. the CUDA-enabled openmpi.
-  #     # TODO: Replace once nix-community obtains an official one.
-  #     "https://cuda-maintainers.cachix.org"
-  #   ];
-  #
-  #   # Verify these are the same keys as published on
-  #   # - https://app.cachix.org/cache/llama-cpp
-  #   # - https://app.cachix.org/cache/cuda-maintainers
-  #   extra-trusted-public-keys = [
-  #     "llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc="
-  #     "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E="
-  #   ];
-  # };
-  # ```
-
-  # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
+  # For inspection, use `nix flake show github:ikawrakow/ik_llama.cpp` or the nix repl:
  #
  # ```bash
  # ❯ nix repl
-  # nix-repl> :lf github:ggerganov/llama.cpp
+  # nix-repl> :lf github:ikawrakow/ik_llama.cpp
  # Added 13 variables.
  # nix-repl> outputs.apps.x86_64-linux.quantize
  # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/llama-quantize"; type = "app"; }
@ -145,7 +109,9 @@
            # the same path you would with an overlay.
            legacyPackages = {
              llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
-              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
+              llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix {
+                inherit llamaVersion;
+              };
              llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
              llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
            };
@ -170,10 +136,6 @@

            # Packages exposed in `.#checks` will be built by the CI and by
            # `nix flake check`.
-            #
-            # We could test all outputs e.g. as `checks = confg.packages`.
-            #
-            # TODO: Build more once https://github.com/ggerganov/llama.cpp/issues/6346 has been addressed
            checks = {
              inherit (config.packages) default vulkan;
            };