From f6c8b5f2cb46db1f97c584a5a3edbc25fd9e5092 Mon Sep 17 00:00:00 2001 From: Aliez Ren Date: Tue, 24 Mar 2026 16:17:52 +0900 Subject: [PATCH] Update nix flake: sync with upstream, fix for newer nixpkgs (#1371) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update nix flake: sync with upstream, fix for newer nixpkgs - Sync package.nix with upstream: add rocmGpuTargets/useRpc params, fix env optionals→optionalAttrs, remove unused inputs - Rewrite devshells.nix to use mkShell directly instead of passthru - Fix CUDA license check in nixpkgs-instances.nix for list-type licenses - Update flake inputs (nixpkgs, flake-parts) to latest - Update references from ggerganov/llama.cpp to ikawrakow/ik_llama.cpp Co-Authored-By: Claude Opus 4.6 * Apply nix review suggestions from baileylu121 (#1371) - Use legacyPackages instead of import to avoid extra nixpkgs instances - Use inherit (pkgs) stdenv idiom - Remove unnecessary lib.pipe wrapper in devshells.nix - Simplify license normalization with lib.toList in nixpkgs-instances.nix Co-Authored-By: Claude Sonnet 4.6 --------- Co-authored-by: Claude Opus 4.6 --- .devops/nix/devshells.nix | 45 ++++++++++++--- .devops/nix/nixpkgs-instances.nix | 7 ++- .devops/nix/package.nix | 93 +++---------------------------- flake.lock | 27 +++++---- flake.nix | 54 +++--------------- 5 files changed, 75 insertions(+), 151 deletions(-) diff --git a/.devops/nix/devshells.nix b/.devops/nix/devshells.nix index 1862f0f0..da227453 100644 --- a/.devops/nix/devshells.nix +++ b/.devops/nix/devshells.nix @@ -1,13 +1,44 @@ +{ inputs, ... }: + { perSystem = - { config, lib, ... }: + { + config, + lib, + system, + ... + }: { devShells = - lib.concatMapAttrs - (name: package: { - ${name} = package.passthru.shell; - ${name + "-extra"} = package.passthru.shell-extra; - }) - config.packages; + let + pkgs = inputs.nixpkgs.legacyPackages.${system}; + inherit (pkgs) stdenv; + in + lib.concatMapAttrs ( + name: package: { + ${name} = pkgs.mkShell { + name = "${name}"; + inputsFrom = [ package ]; + shellHook = '' + echo "Entering ${name} devShell" + ''; + }; + "${name}-extra" = pkgs.mkShell { + name = "${name}-extra"; + inputsFrom = [ package ]; + packages = with pkgs.python3Packages; [ + numpy + sentencepiece + tiktoken + torchWithoutCuda + transformers + ]; + shellHook = '' + echo "Entering ${name}-extra devShell" + addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib" + ''; + }; + } + ) config.packages; }; } diff --git a/.devops/nix/nixpkgs-instances.nix b/.devops/nix/nixpkgs-instances.nix index 4a2f81c4..c1f1c219 100644 --- a/.devops/nix/nixpkgs-instances.nix +++ b/.devops/nix/nixpkgs-instances.nix @@ -4,7 +4,7 @@ # the module `{ pkgs ... }: { /* config */ }` implicitly uses # `_module.args.pkgs` (defined in this case by flake-parts). perSystem = - { system, ... }: + { lib, system, ... }: { _module.args = { # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs @@ -26,6 +26,9 @@ config.cudaSupport = true; config.allowUnfreePredicate = p: + let + licenses = lib.toList (p.meta.license or []); + in builtins.all ( license: @@ -35,7 +38,7 @@ "cuDNN EULA" ] ) - (p.meta.licenses or [ p.meta.license ]); + licenses; }; # Ensure dependencies use ROCm consistently pkgsRocm = import inputs.nixpkgs { diff --git a/.devops/nix/package.nix b/.devops/nix/package.nix index cfffac25..2ea0776f 100644 --- a/.devops/nix/package.nix +++ b/.devops/nix/package.nix @@ -3,13 +3,11 @@ glibc, config, stdenv, - mkShell, runCommand, cmake, ninja, pkg-config, git, - python3, mpi, blas, cudaPackages, @@ -30,8 +28,10 @@ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin, useMpi ? false, # Increases the runtime closure size by ~700M useRocm ? config.rocmSupport, - enableCurl ? true, + rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets, useVulkan ? false, + useRpc ? false, + enableCurl ? true, llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake # It's necessary to consistently use backendStdenv when building with CUDA support, @@ -45,9 +45,9 @@ let inherit (lib) cmakeBool cmakeFeature + optionalAttrs optionals strings - versionOlder ; stdenv = throw "Use effectiveStdenv instead"; @@ -67,49 +67,6 @@ let strings.optionalString (suffices != [ ]) ", accelerated with ${strings.concatStringsSep ", " suffices}"; - executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; - - # TODO: package the Python in this repository in a Nix-like way. - # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo - # is PEP 517-compatible, and ensure the correct .dist-info is generated. - # https://peps.python.org/pep-0517/ - # - # TODO: Package up each Python script or service appropriately, by making - # them into "entrypoints" - llama-python = python3.withPackages ( - ps: [ - ps.numpy - ps.sentencepiece - ] - ); - - # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime - llama-python-extra = python3.withPackages ( - ps: [ - ps.numpy - ps.sentencepiece - ps.tiktoken - ps.torchWithoutCuda - ps.transformers - - # server bench - ps.matplotlib - - # server tests - ps.openai - ps.behave - ps.prometheus-client - - # for examples/pydantic-models-to-grammar-examples.py - ps.docstring-parser - ps.pydantic - - # for scripts/compare-llama-bench.py - ps.gitpython - ps.tabulate - ] - ); - xcrunHost = runCommand "xcrunHost" {} '' mkdir -p $out/bin ln -s /usr/bin/xcrun $out/bin @@ -223,6 +180,7 @@ effectiveStdenv.mkDerivation ( (cmakeBool "GGML_METAL" useMetalKit) (cmakeBool "GGML_VULKAN" useVulkan) (cmakeBool "GGML_STATIC" enableStatic) + (cmakeBool "GGML_RPC" useRpc) ] ++ optionals useCuda [ ( @@ -234,7 +192,7 @@ effectiveStdenv.mkDerivation ( ] ++ optionals useRocm [ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang") - (cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets)) + (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets) ] ++ optionals useMetalKit [ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") @@ -242,7 +200,7 @@ effectiveStdenv.mkDerivation ( ]; # Environment variables needed for ROCm - env = optionals useRocm { + env = optionalAttrs useRocm { ROCM_PATH = "${rocmPackages.clr}"; HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode"; }; @@ -254,7 +212,6 @@ effectiveStdenv.mkDerivation ( cp $src/include/llama.h $out/include/ ''; - # Define the shells here, but don't add in the inputsFrom to avoid recursion. passthru = { inherit useBlas @@ -264,23 +221,6 @@ effectiveStdenv.mkDerivation ( useRocm useVulkan ; - - shell = mkShell { - name = "shell-${finalAttrs.finalPackage.name}"; - description = "contains numpy and sentencepiece"; - buildInputs = [ llama-python ]; - inputsFrom = [ finalAttrs.finalPackage ]; - shellHook = '' - addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" - ''; - }; - - shell-extra = mkShell { - name = "shell-extra-${finalAttrs.finalPackage.name}"; - description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; - buildInputs = [ llama-python-extra ]; - inputsFrom = [ finalAttrs.finalPackage ]; - }; }; meta = { @@ -293,28 +233,13 @@ effectiveStdenv.mkDerivation ( # overridden by importing Nixpkgs with `allowBroken = true`. broken = (useMetalKit && !effectiveStdenv.isDarwin); - description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; - homepage = "https://github.com/ggerganov/llama.cpp/"; + description = "ik_llama.cpp: llama.cpp fork with better CPU performance${descriptionSuffix}"; + homepage = "https://github.com/ikawrakow/ik_llama.cpp/"; license = lib.licenses.mit; # Accommodates `nix run` and `lib.getExe` mainProgram = "llama-cli"; - # These people might respond, on the best effort basis, if you ping them - # in case of Nix-specific regressions or for reviewing Nix-specific PRs. - # Consider adding yourself to this list if you want to ensure this flake - # stays maintained and you're willing to invest your time. Do not add - # other people without their consent. Consider removing people after - # they've been unreachable for long periods of time. - - # Note that lib.maintainers is defined in Nixpkgs, but you may just add - # an attrset following the same format as in - # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix - maintainers = with lib.maintainers; [ - philiptaron - SomeoneSerge - ]; - # Extend `badPlatforms` instead platforms = lib.platforms.all; }; diff --git a/flake.lock b/flake.lock index f9e1548a..cf1abde6 100644 --- a/flake.lock +++ b/flake.lock @@ -5,11 +5,11 @@ "nixpkgs-lib": "nixpkgs-lib" }, "locked": { - "lastModified": 1722555600, - "narHash": "sha256-XOQkdLafnb/p9ij77byFQjDf5m5QYl9b2REiVClC+x4=", + "lastModified": 1772408722, + "narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=", "owner": "hercules-ci", "repo": "flake-parts", - "rev": "8471fe90ad337a8074e957b69ca4d0089218391d", + "rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3", "type": "github" }, "original": { @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1723175592, - "narHash": "sha256-M0xJ3FbDUc4fRZ84dPGx5VvgFsOzds77KiBMW/mMTnI=", + "lastModified": 1772624091, + "narHash": "sha256-QKyJ0QGWBn6r0invrMAK8dmJoBYWoOWy7lN+UHzW1jc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "5e0ca22929f3342b19569b21b2f3462f053e497b", + "rev": "80bdc1e5ce51f56b19791b52b2901187931f5353", "type": "github" }, "original": { @@ -36,14 +36,17 @@ }, "nixpkgs-lib": { "locked": { - "lastModified": 1722555339, - "narHash": "sha256-uFf2QeW7eAHlYXuDktm9c25OxOyCoUOQmh5SZ9amE5Q=", - "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz" + "lastModified": 1772328832, + "narHash": "sha256-e+/T/pmEkLP6BHhYjx6GmwP5ivonQQn0bJdH9YrRB+Q=", + "owner": "nix-community", + "repo": "nixpkgs.lib", + "rev": "c185c7a5e5dd8f9add5b2f8ebeff00888b070742", + "type": "github" }, "original": { - "type": "tarball", - "url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz" + "owner": "nix-community", + "repo": "nixpkgs.lib", + "type": "github" } }, "root": { diff --git a/flake.nix b/flake.nix index c69637d1..49bd636e 100644 --- a/flake.nix +++ b/flake.nix @@ -1,4 +1,4 @@ -# The flake interface to llama.cpp's Nix expressions. The flake is used as a +# The flake interface to ik_llama.cpp's Nix expressions. The flake is used as a # more discoverable entry-point, as well as a way to pin the dependencies and # expose default outputs, including the outputs built by the CI. @@ -7,60 +7,24 @@ # directly: # # ```nix -# pkgs.callPackage ${llama-cpp-root}/.devops/nix/scope.nix { }` +# pkgs.callPackage ${ik-llama-cpp-root}/.devops/nix/scope.nix { }` # ``` # Cf. https://jade.fyi/blog/flakes-arent-real/ for a more detailed exposition # of the relation between Nix and the Nix Flakes. { - description = "Port of Facebook's LLaMA model in C/C++"; + description = "ik_llama.cpp: llama.cpp fork with better CPU performance"; inputs = { nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; flake-parts.url = "github:hercules-ci/flake-parts"; }; - # There's an optional binary cache available. The details are below, but they're commented out. - # - # Why? The terrible experience of being prompted to accept them on every single Nix command run. - # Plus, there are warnings shown about not being a trusted user on a default Nix install - # if you *do* say yes to the prompts. - # - # This experience makes having `nixConfig` in a flake a persistent UX problem. - # - # To make use of the binary cache, please add the relevant settings to your `nix.conf`. - # It's located at `/etc/nix/nix.conf` on non-NixOS systems. On NixOS, adjust the `nix.settings` - # option in your NixOS configuration to add `extra-substituters` and `extra-trusted-public-keys`, - # as shown below. - # - # ``` - # nixConfig = { - # extra-substituters = [ - # # Populated by the CI in ggerganov/llama.cpp - # "https://llama-cpp.cachix.org" - # - # # A development cache for nixpkgs imported with `config.cudaSupport = true`. - # # Populated by https://hercules-ci.com/github/SomeoneSerge/nixpkgs-cuda-ci. - # # This lets one skip building e.g. the CUDA-enabled openmpi. - # # TODO: Replace once nix-community obtains an official one. - # "https://cuda-maintainers.cachix.org" - # ]; - # - # # Verify these are the same keys as published on - # # - https://app.cachix.org/cache/llama-cpp - # # - https://app.cachix.org/cache/cuda-maintainers - # extra-trusted-public-keys = [ - # "llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc=" - # "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=" - # ]; - # }; - # ``` - - # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: + # For inspection, use `nix flake show github:ikawrakow/ik_llama.cpp` or the nix repl: # # ```bash # ❯ nix repl - # nix-repl> :lf github:ggerganov/llama.cpp + # nix-repl> :lf github:ikawrakow/ik_llama.cpp # Added 13 variables. # nix-repl> outputs.apps.x86_64-linux.quantize # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/llama-quantize"; type = "app"; } @@ -145,7 +109,9 @@ # the same path you would with an overlay. legacyPackages = { llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; - llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; + llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { + inherit llamaVersion; + }; llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; }; @@ -170,10 +136,6 @@ # Packages exposed in `.#checks` will be built by the CI and by # `nix flake check`. - # - # We could test all outputs e.g. as `checks = confg.packages`. - # - # TODO: Build more once https://github.com/ggerganov/llama.cpp/issues/6346 has been addressed checks = { inherit (config.packages) default vulkan; };