mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Update nix flake: sync with upstream, fix for newer nixpkgs (#1371)
* Update nix flake: sync with upstream, fix for newer nixpkgs - Sync package.nix with upstream: add rocmGpuTargets/useRpc params, fix env optionals→optionalAttrs, remove unused inputs - Rewrite devshells.nix to use mkShell directly instead of passthru - Fix CUDA license check in nixpkgs-instances.nix for list-type licenses - Update flake inputs (nixpkgs, flake-parts) to latest - Update references from ggerganov/llama.cpp to ikawrakow/ik_llama.cpp Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com> * Apply nix review suggestions from baileylu121 (#1371) - Use legacyPackages instead of import to avoid extra nixpkgs instances - Use inherit (pkgs) stdenv idiom - Remove unnecessary lib.pipe wrapper in devshells.nix - Simplify license normalization with lib.toList in nixpkgs-instances.nix Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f4125e8b1f
commit
f6c8b5f2cb
@ -1,13 +1,44 @@
|
||||
{ inputs, ... }:
|
||||
|
||||
{
|
||||
perSystem =
|
||||
{ config, lib, ... }:
|
||||
{
|
||||
config,
|
||||
lib,
|
||||
system,
|
||||
...
|
||||
}:
|
||||
{
|
||||
devShells =
|
||||
lib.concatMapAttrs
|
||||
(name: package: {
|
||||
${name} = package.passthru.shell;
|
||||
${name + "-extra"} = package.passthru.shell-extra;
|
||||
})
|
||||
config.packages;
|
||||
let
|
||||
pkgs = inputs.nixpkgs.legacyPackages.${system};
|
||||
inherit (pkgs) stdenv;
|
||||
in
|
||||
lib.concatMapAttrs (
|
||||
name: package: {
|
||||
${name} = pkgs.mkShell {
|
||||
name = "${name}";
|
||||
inputsFrom = [ package ];
|
||||
shellHook = ''
|
||||
echo "Entering ${name} devShell"
|
||||
'';
|
||||
};
|
||||
"${name}-extra" = pkgs.mkShell {
|
||||
name = "${name}-extra";
|
||||
inputsFrom = [ package ];
|
||||
packages = with pkgs.python3Packages; [
|
||||
numpy
|
||||
sentencepiece
|
||||
tiktoken
|
||||
torchWithoutCuda
|
||||
transformers
|
||||
];
|
||||
shellHook = ''
|
||||
echo "Entering ${name}-extra devShell"
|
||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
||||
'';
|
||||
};
|
||||
}
|
||||
) config.packages;
|
||||
};
|
||||
}
|
||||
|
||||
@ -4,7 +4,7 @@
|
||||
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
||||
# `_module.args.pkgs` (defined in this case by flake-parts).
|
||||
perSystem =
|
||||
{ system, ... }:
|
||||
{ lib, system, ... }:
|
||||
{
|
||||
_module.args = {
|
||||
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
||||
@ -26,6 +26,9 @@
|
||||
config.cudaSupport = true;
|
||||
config.allowUnfreePredicate =
|
||||
p:
|
||||
let
|
||||
licenses = lib.toList (p.meta.license or []);
|
||||
in
|
||||
builtins.all
|
||||
(
|
||||
license:
|
||||
@ -35,7 +38,7 @@
|
||||
"cuDNN EULA"
|
||||
]
|
||||
)
|
||||
(p.meta.licenses or [ p.meta.license ]);
|
||||
licenses;
|
||||
};
|
||||
# Ensure dependencies use ROCm consistently
|
||||
pkgsRocm = import inputs.nixpkgs {
|
||||
|
||||
@ -3,13 +3,11 @@
|
||||
glibc,
|
||||
config,
|
||||
stdenv,
|
||||
mkShell,
|
||||
runCommand,
|
||||
cmake,
|
||||
ninja,
|
||||
pkg-config,
|
||||
git,
|
||||
python3,
|
||||
mpi,
|
||||
blas,
|
||||
cudaPackages,
|
||||
@ -30,8 +28,10 @@
|
||||
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
||||
useMpi ? false, # Increases the runtime closure size by ~700M
|
||||
useRocm ? config.rocmSupport,
|
||||
enableCurl ? true,
|
||||
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
||||
useVulkan ? false,
|
||||
useRpc ? false,
|
||||
enableCurl ? true,
|
||||
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
||||
|
||||
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
||||
@ -45,9 +45,9 @@ let
|
||||
inherit (lib)
|
||||
cmakeBool
|
||||
cmakeFeature
|
||||
optionalAttrs
|
||||
optionals
|
||||
strings
|
||||
versionOlder
|
||||
;
|
||||
|
||||
stdenv = throw "Use effectiveStdenv instead";
|
||||
@ -67,49 +67,6 @@ let
|
||||
strings.optionalString (suffices != [ ])
|
||||
", accelerated with ${strings.concatStringsSep ", " suffices}";
|
||||
|
||||
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;
|
||||
|
||||
# TODO: package the Python in this repository in a Nix-like way.
|
||||
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
|
||||
# is PEP 517-compatible, and ensure the correct .dist-info is generated.
|
||||
# https://peps.python.org/pep-0517/
|
||||
#
|
||||
# TODO: Package up each Python script or service appropriately, by making
|
||||
# them into "entrypoints"
|
||||
llama-python = python3.withPackages (
|
||||
ps: [
|
||||
ps.numpy
|
||||
ps.sentencepiece
|
||||
]
|
||||
);
|
||||
|
||||
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
|
||||
llama-python-extra = python3.withPackages (
|
||||
ps: [
|
||||
ps.numpy
|
||||
ps.sentencepiece
|
||||
ps.tiktoken
|
||||
ps.torchWithoutCuda
|
||||
ps.transformers
|
||||
|
||||
# server bench
|
||||
ps.matplotlib
|
||||
|
||||
# server tests
|
||||
ps.openai
|
||||
ps.behave
|
||||
ps.prometheus-client
|
||||
|
||||
# for examples/pydantic-models-to-grammar-examples.py
|
||||
ps.docstring-parser
|
||||
ps.pydantic
|
||||
|
||||
# for scripts/compare-llama-bench.py
|
||||
ps.gitpython
|
||||
ps.tabulate
|
||||
]
|
||||
);
|
||||
|
||||
xcrunHost = runCommand "xcrunHost" {} ''
|
||||
mkdir -p $out/bin
|
||||
ln -s /usr/bin/xcrun $out/bin
|
||||
@ -223,6 +180,7 @@ effectiveStdenv.mkDerivation (
|
||||
(cmakeBool "GGML_METAL" useMetalKit)
|
||||
(cmakeBool "GGML_VULKAN" useVulkan)
|
||||
(cmakeBool "GGML_STATIC" enableStatic)
|
||||
(cmakeBool "GGML_RPC" useRpc)
|
||||
]
|
||||
++ optionals useCuda [
|
||||
(
|
||||
@ -234,7 +192,7 @@ effectiveStdenv.mkDerivation (
|
||||
]
|
||||
++ optionals useRocm [
|
||||
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
||||
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
||||
]
|
||||
++ optionals useMetalKit [
|
||||
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
||||
@ -242,7 +200,7 @@ effectiveStdenv.mkDerivation (
|
||||
];
|
||||
|
||||
# Environment variables needed for ROCm
|
||||
env = optionals useRocm {
|
||||
env = optionalAttrs useRocm {
|
||||
ROCM_PATH = "${rocmPackages.clr}";
|
||||
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
||||
};
|
||||
@ -254,7 +212,6 @@ effectiveStdenv.mkDerivation (
|
||||
cp $src/include/llama.h $out/include/
|
||||
'';
|
||||
|
||||
# Define the shells here, but don't add in the inputsFrom to avoid recursion.
|
||||
passthru = {
|
||||
inherit
|
||||
useBlas
|
||||
@ -264,23 +221,6 @@ effectiveStdenv.mkDerivation (
|
||||
useRocm
|
||||
useVulkan
|
||||
;
|
||||
|
||||
shell = mkShell {
|
||||
name = "shell-${finalAttrs.finalPackage.name}";
|
||||
description = "contains numpy and sentencepiece";
|
||||
buildInputs = [ llama-python ];
|
||||
inputsFrom = [ finalAttrs.finalPackage ];
|
||||
shellHook = ''
|
||||
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
|
||||
'';
|
||||
};
|
||||
|
||||
shell-extra = mkShell {
|
||||
name = "shell-extra-${finalAttrs.finalPackage.name}";
|
||||
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
|
||||
buildInputs = [ llama-python-extra ];
|
||||
inputsFrom = [ finalAttrs.finalPackage ];
|
||||
};
|
||||
};
|
||||
|
||||
meta = {
|
||||
@ -293,28 +233,13 @@ effectiveStdenv.mkDerivation (
|
||||
# overridden by importing Nixpkgs with `allowBroken = true`.
|
||||
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
||||
|
||||
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
||||
homepage = "https://github.com/ggerganov/llama.cpp/";
|
||||
description = "ik_llama.cpp: llama.cpp fork with better CPU performance${descriptionSuffix}";
|
||||
homepage = "https://github.com/ikawrakow/ik_llama.cpp/";
|
||||
license = lib.licenses.mit;
|
||||
|
||||
# Accommodates `nix run` and `lib.getExe`
|
||||
mainProgram = "llama-cli";
|
||||
|
||||
# These people might respond, on the best effort basis, if you ping them
|
||||
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
||||
# Consider adding yourself to this list if you want to ensure this flake
|
||||
# stays maintained and you're willing to invest your time. Do not add
|
||||
# other people without their consent. Consider removing people after
|
||||
# they've been unreachable for long periods of time.
|
||||
|
||||
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
||||
# an attrset following the same format as in
|
||||
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
||||
maintainers = with lib.maintainers; [
|
||||
philiptaron
|
||||
SomeoneSerge
|
||||
];
|
||||
|
||||
# Extend `badPlatforms` instead
|
||||
platforms = lib.platforms.all;
|
||||
};
|
||||
|
||||
27
flake.lock
generated
27
flake.lock
generated
@ -5,11 +5,11 @@
|
||||
"nixpkgs-lib": "nixpkgs-lib"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1722555600,
|
||||
"narHash": "sha256-XOQkdLafnb/p9ij77byFQjDf5m5QYl9b2REiVClC+x4=",
|
||||
"lastModified": 1772408722,
|
||||
"narHash": "sha256-rHuJtdcOjK7rAHpHphUb1iCvgkU3GpfvicLMwwnfMT0=",
|
||||
"owner": "hercules-ci",
|
||||
"repo": "flake-parts",
|
||||
"rev": "8471fe90ad337a8074e957b69ca4d0089218391d",
|
||||
"rev": "f20dc5d9b8027381c474144ecabc9034d6a839a3",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -20,11 +20,11 @@
|
||||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1723175592,
|
||||
"narHash": "sha256-M0xJ3FbDUc4fRZ84dPGx5VvgFsOzds77KiBMW/mMTnI=",
|
||||
"lastModified": 1772624091,
|
||||
"narHash": "sha256-QKyJ0QGWBn6r0invrMAK8dmJoBYWoOWy7lN+UHzW1jc=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "5e0ca22929f3342b19569b21b2f3462f053e497b",
|
||||
"rev": "80bdc1e5ce51f56b19791b52b2901187931f5353",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
@ -36,14 +36,17 @@
|
||||
},
|
||||
"nixpkgs-lib": {
|
||||
"locked": {
|
||||
"lastModified": 1722555339,
|
||||
"narHash": "sha256-uFf2QeW7eAHlYXuDktm9c25OxOyCoUOQmh5SZ9amE5Q=",
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz"
|
||||
"lastModified": 1772328832,
|
||||
"narHash": "sha256-e+/T/pmEkLP6BHhYjx6GmwP5ivonQQn0bJdH9YrRB+Q=",
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"rev": "c185c7a5e5dd8f9add5b2f8ebeff00888b070742",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"type": "tarball",
|
||||
"url": "https://github.com/NixOS/nixpkgs/archive/a5d394176e64ab29c852d03346c1fc9b0b7d33eb.tar.gz"
|
||||
"owner": "nix-community",
|
||||
"repo": "nixpkgs.lib",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
|
||||
54
flake.nix
54
flake.nix
@ -1,4 +1,4 @@
|
||||
# The flake interface to llama.cpp's Nix expressions. The flake is used as a
|
||||
# The flake interface to ik_llama.cpp's Nix expressions. The flake is used as a
|
||||
# more discoverable entry-point, as well as a way to pin the dependencies and
|
||||
# expose default outputs, including the outputs built by the CI.
|
||||
|
||||
@ -7,60 +7,24 @@
|
||||
# directly:
|
||||
#
|
||||
# ```nix
|
||||
# pkgs.callPackage ${llama-cpp-root}/.devops/nix/scope.nix { }`
|
||||
# pkgs.callPackage ${ik-llama-cpp-root}/.devops/nix/scope.nix { }`
|
||||
# ```
|
||||
|
||||
# Cf. https://jade.fyi/blog/flakes-arent-real/ for a more detailed exposition
|
||||
# of the relation between Nix and the Nix Flakes.
|
||||
{
|
||||
description = "Port of Facebook's LLaMA model in C/C++";
|
||||
description = "ik_llama.cpp: llama.cpp fork with better CPU performance";
|
||||
|
||||
inputs = {
|
||||
nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable";
|
||||
flake-parts.url = "github:hercules-ci/flake-parts";
|
||||
};
|
||||
|
||||
# There's an optional binary cache available. The details are below, but they're commented out.
|
||||
#
|
||||
# Why? The terrible experience of being prompted to accept them on every single Nix command run.
|
||||
# Plus, there are warnings shown about not being a trusted user on a default Nix install
|
||||
# if you *do* say yes to the prompts.
|
||||
#
|
||||
# This experience makes having `nixConfig` in a flake a persistent UX problem.
|
||||
#
|
||||
# To make use of the binary cache, please add the relevant settings to your `nix.conf`.
|
||||
# It's located at `/etc/nix/nix.conf` on non-NixOS systems. On NixOS, adjust the `nix.settings`
|
||||
# option in your NixOS configuration to add `extra-substituters` and `extra-trusted-public-keys`,
|
||||
# as shown below.
|
||||
#
|
||||
# ```
|
||||
# nixConfig = {
|
||||
# extra-substituters = [
|
||||
# # Populated by the CI in ggerganov/llama.cpp
|
||||
# "https://llama-cpp.cachix.org"
|
||||
#
|
||||
# # A development cache for nixpkgs imported with `config.cudaSupport = true`.
|
||||
# # Populated by https://hercules-ci.com/github/SomeoneSerge/nixpkgs-cuda-ci.
|
||||
# # This lets one skip building e.g. the CUDA-enabled openmpi.
|
||||
# # TODO: Replace once nix-community obtains an official one.
|
||||
# "https://cuda-maintainers.cachix.org"
|
||||
# ];
|
||||
#
|
||||
# # Verify these are the same keys as published on
|
||||
# # - https://app.cachix.org/cache/llama-cpp
|
||||
# # - https://app.cachix.org/cache/cuda-maintainers
|
||||
# extra-trusted-public-keys = [
|
||||
# "llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc="
|
||||
# "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E="
|
||||
# ];
|
||||
# };
|
||||
# ```
|
||||
|
||||
# For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl:
|
||||
# For inspection, use `nix flake show github:ikawrakow/ik_llama.cpp` or the nix repl:
|
||||
#
|
||||
# ```bash
|
||||
# ❯ nix repl
|
||||
# nix-repl> :lf github:ggerganov/llama.cpp
|
||||
# nix-repl> :lf github:ikawrakow/ik_llama.cpp
|
||||
# Added 13 variables.
|
||||
# nix-repl> outputs.apps.x86_64-linux.quantize
|
||||
# { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/llama-quantize"; type = "app"; }
|
||||
@ -145,7 +109,9 @@
|
||||
# the same path you would with an overlay.
|
||||
legacyPackages = {
|
||||
llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||
llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix {
|
||||
inherit llamaVersion;
|
||||
};
|
||||
llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||
llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; };
|
||||
};
|
||||
@ -170,10 +136,6 @@
|
||||
|
||||
# Packages exposed in `.#checks` will be built by the CI and by
|
||||
# `nix flake check`.
|
||||
#
|
||||
# We could test all outputs e.g. as `checks = confg.packages`.
|
||||
#
|
||||
# TODO: Build more once https://github.com/ggerganov/llama.cpp/issues/6346 has been addressed
|
||||
checks = {
|
||||
inherit (config.packages) default vulkan;
|
||||
};
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user