name: CI (self-hosted) on: workflow_dispatch: # allows manual triggering push: branches: - master paths: [ '.github/workflows/build.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp', '**/*.glsl', '**/*.wgsl' ] pull_request: types: [opened, synchronize, reopened] paths: [ '.github/workflows/build-self-hosted.yml', '**/CMakeLists.txt', '**/.cmake', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp', '**/*.glsl', '**/*.wgsl' ] concurrency: group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }} cancel-in-progress: true env: GGML_NLOOP: 3 GGML_N_THREADS: 1 LLAMA_ARG_LOG_COLORS: 1 LLAMA_ARG_LOG_PREFIX: 1 LLAMA_ARG_LOG_TIMESTAMPS: 1 jobs: gpu-cuda: runs-on: [self-hosted, Linux, NVIDIA] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | nvidia-smi GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-vulkan-nvidia-cm: runs-on: [self-hosted, Linux, NVIDIA] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-vulkan-nvidia-cm2: runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-webgpu-nvidia: runs-on: [self-hosted, Linux, NVIDIA, X64] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Dawn Dependency id: dawn-depends run: | DAWN_VERSION="v20260317.182325" DAWN_OWNER="google" DAWN_REPO="dawn" DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release" echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" curl -L -o artifact.tar.gz \ "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" mkdir dawn tar -xvf artifact.tar.gz -C dawn --strip-components=1 - name: Test id: ggml-ci run: | GG_BUILD_WEBGPU=1 \ GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \ bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMX-compatible machine #cpu-amx: # runs-on: [self-hosted, Linux, CPU, AMX] # steps: # - name: Clone # id: checkout # uses: actions/checkout@v6 # - name: Test # id: ggml-ci # run: | # bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMD GPU machine # amd-vulkan: # runs-on: [self-hosted, Linux, AMD] # steps: # - name: Clone # id: checkout # uses: actions/checkout@v6 # - name: Test # id: ggml-ci # run: | # vulkaninfo --summary # GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp # TODO: provision AMD GPU machine # amd-rocm: # runs-on: [self-hosted, Linux, AMD] # steps: # - name: Clone # id: checkout # uses: actions/checkout@v6 # - name: Test # id: ggml-ci # run: | # amd-smi static # GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-metal: runs-on: [self-hosted, macOS, ARM64] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-webgpu-apple: runs-on: [self-hosted, macOS, ARM64] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Dawn Dependency id: dawn-depends run: | DAWN_VERSION="v20260317.182325" DAWN_OWNER="google" DAWN_REPO="dawn" DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release" echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" curl -L -o artifact.tar.gz \ "https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz" mkdir dawn tar -xvf artifact.tar.gz -C dawn --strip-components=1 - name: Test id: ggml-ci run: | GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \ bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-vulkan-apple: runs-on: [self-hosted, macOS, ARM64] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-vulkan-intel-linux: runs-on: [self-hosted, Linux, Intel] steps: - name: Clone id: checkout uses: actions/checkout@v6 with: persist-credentials: false - name: Test id: ggml-ci run: | vulkaninfo --summary GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp gpu-vulkan-intel-windows: runs-on: [self-hosted, Windows, X64, Intel] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}" env: MSYSTEM: UCRT64 CHERE_INVOKING: 1 PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }} run: | vulkaninfo --summary # Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create # a valid python environment for testing LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp gpu-openvino-low-perf: runs-on: [self-hosted, Linux, Intel, OpenVINO] env: # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile OPENVINO_VERSION_MAJOR: "2026.2.1" OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3" steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Setup OpenVINO Toolkit uses: ./.github/actions/linux-setup-openvino with: path: ./openvino_toolkit version_major: ${{ env.OPENVINO_VERSION_MAJOR }} version_full: ${{ env.OPENVINO_VERSION_FULL }} - name: Install OpenVINO dependencies run: | cd ./openvino_toolkit chmod +x ./install_dependencies/install_openvino_dependencies.sh echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh - name: Test id: ggml-ci run: | source ./openvino_toolkit/setupvars.sh GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp cpu-x64-high-perf: runs-on: [self-hosted, Linux, X64] steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp cpu-arm64-high-perf-graviton4: runs-on: ah-ubuntu_22_04-c8g_8x steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Dependencies id: depends run: | set -euxo pipefail sudo apt-get update sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \ apt-get install -y \ build-essential \ python3-venv \ gpg \ wget \ time \ git-lfs git lfs install # install the latest cmake sudo install -d /usr/share/keyrings wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \ | gpg --dearmor \ | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \ | sudo tee /etc/apt/sources.list.d/kitware.list sudo apt-get update sudo apt-get install -y cmake - name: Test id: ggml-ci run: | LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp cpu-arm64-graviton4-kleidiai: runs-on: ah-ubuntu_22_04-c8g_8x steps: - name: Clone id: checkout uses: actions/checkout@v6 - name: Dependencies id: depends run: | set -euxo pipefail sudo apt-get update sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \ apt-get install -y \ build-essential \ python3-venv \ gpg \ wget \ time \ git-lfs git lfs install # install the latest cmake sudo install -d /usr/share/keyrings wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \ | gpg --dearmor \ | sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \ | sudo tee /etc/apt/sources.list.d/kitware.list sudo apt-get update sudo apt-get install -y cmake - name: Test id: ggml-ci run: | GG_BUILD_KLEIDIAI=1 \ GG_BUILD_EXTRA_TESTS_0=1 \ bash ./ci/run.sh ./tmp/results ./tmp/mnt