mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
* Update to OV 2026.2.1, Make OV release packages self-contained * Update to OV 2026.2.1, Make OV release packages self-contained * OpenVINO Backend: Remove compute_op_type hardcoded sets (#222) * OpenVINO Backend: Remove compute_op_type hardcoded sets * revert get_op_type removal * OpenVINO backend: enable softmax with sink input * OpenVINO backend: opt mul_mat_id convert process for large size * OpenVINO backend: Modify add_id to support 2D/4D * OpenVINO Backend: Add glu_swiglu_oai * PR review: fix paths * PR review: fix path consistency --------- Co-authored-by: Mostafa <mostafas.main.email@gmail.com> Co-authored-by: Xuejun <Xuejun.Zhai@intel.com>
388 lines
11 KiB
YAML
388 lines
11 KiB
YAML
name: CI (self-hosted)
|
|
|
|
on:
|
|
workflow_dispatch: # allows manual triggering
|
|
push:
|
|
branches:
|
|
- master
|
|
paths: [
|
|
'.github/workflows/build.yml',
|
|
'**/CMakeLists.txt',
|
|
'**/.cmake',
|
|
'**/*.h',
|
|
'**/*.hpp',
|
|
'**/*.c',
|
|
'**/*.cpp',
|
|
'**/*.cu',
|
|
'**/*.cuh',
|
|
'**/*.swift',
|
|
'**/*.m',
|
|
'**/*.metal',
|
|
'**/*.comp',
|
|
'**/*.glsl',
|
|
'**/*.wgsl'
|
|
]
|
|
|
|
pull_request:
|
|
types: [opened, synchronize, reopened]
|
|
paths: [
|
|
'.github/workflows/build-self-hosted.yml',
|
|
'**/CMakeLists.txt',
|
|
'**/.cmake',
|
|
'**/*.h',
|
|
'**/*.hpp',
|
|
'**/*.c',
|
|
'**/*.cpp',
|
|
'**/*.cu',
|
|
'**/*.cuh',
|
|
'**/*.swift',
|
|
'**/*.m',
|
|
'**/*.metal',
|
|
'**/*.comp',
|
|
'**/*.glsl',
|
|
'**/*.wgsl'
|
|
]
|
|
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
|
cancel-in-progress: true
|
|
|
|
env:
|
|
GGML_NLOOP: 3
|
|
GGML_N_THREADS: 1
|
|
LLAMA_ARG_LOG_COLORS: 1
|
|
LLAMA_ARG_LOG_PREFIX: 1
|
|
LLAMA_ARG_LOG_TIMESTAMPS: 1
|
|
|
|
jobs:
|
|
gpu-cuda:
|
|
runs-on: [self-hosted, Linux, NVIDIA]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
nvidia-smi
|
|
GG_BUILD_CUDA=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-vulkan-nvidia-cm:
|
|
runs-on: [self-hosted, Linux, NVIDIA]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
vulkaninfo --summary
|
|
GG_BUILD_VULKAN=1 GGML_VK_DISABLE_COOPMAT2=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-vulkan-nvidia-cm2:
|
|
runs-on: [self-hosted, Linux, NVIDIA, COOPMAT2]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
vulkaninfo --summary
|
|
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-webgpu-nvidia:
|
|
runs-on: [self-hosted, Linux, NVIDIA, X64]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Dawn Dependency
|
|
id: dawn-depends
|
|
run: |
|
|
DAWN_VERSION="v20260317.182325"
|
|
DAWN_OWNER="google"
|
|
DAWN_REPO="dawn"
|
|
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-ubuntu-latest-Release"
|
|
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
|
|
curl -L -o artifact.tar.gz \
|
|
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
|
|
mkdir dawn
|
|
tar -xvf artifact.tar.gz -C dawn --strip-components=1
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
GG_BUILD_WEBGPU=1 \
|
|
GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
|
|
GG_BUILD_WEBGPU_DAWN_DIR="$GITHUB_WORKSPACE/dawn/lib64/cmake/Dawn" \
|
|
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
# TODO: provision AMX-compatible machine
|
|
#cpu-amx:
|
|
# runs-on: [self-hosted, Linux, CPU, AMX]
|
|
|
|
# steps:
|
|
# - name: Clone
|
|
# id: checkout
|
|
# uses: actions/checkout@v6
|
|
|
|
# - name: Test
|
|
# id: ggml-ci
|
|
# run: |
|
|
# bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
# TODO: provision AMD GPU machine
|
|
# amd-vulkan:
|
|
# runs-on: [self-hosted, Linux, AMD]
|
|
|
|
# steps:
|
|
# - name: Clone
|
|
# id: checkout
|
|
# uses: actions/checkout@v6
|
|
|
|
# - name: Test
|
|
# id: ggml-ci
|
|
# run: |
|
|
# vulkaninfo --summary
|
|
# GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
# TODO: provision AMD GPU machine
|
|
# amd-rocm:
|
|
# runs-on: [self-hosted, Linux, AMD]
|
|
|
|
# steps:
|
|
# - name: Clone
|
|
# id: checkout
|
|
# uses: actions/checkout@v6
|
|
|
|
# - name: Test
|
|
# id: ggml-ci
|
|
# run: |
|
|
# amd-smi static
|
|
# GG_BUILD_ROCM=1 GG_BUILD_AMDGPU_TARGETS="gfx1101" bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-metal:
|
|
runs-on: [self-hosted, macOS, ARM64]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
GG_BUILD_METAL=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-webgpu-apple:
|
|
runs-on: [self-hosted, macOS, ARM64]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Dawn Dependency
|
|
id: dawn-depends
|
|
run: |
|
|
DAWN_VERSION="v20260317.182325"
|
|
DAWN_OWNER="google"
|
|
DAWN_REPO="dawn"
|
|
DAWN_ASSET_NAME="Dawn-18eb229ef5f707c1464cc581252e7603c73a3ef0-macos-latest-Release"
|
|
echo "Fetching release asset from https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
|
|
curl -L -o artifact.tar.gz \
|
|
"https://github.com/google/dawn/releases/download/${DAWN_VERSION}/${DAWN_ASSET_NAME}.tar.gz"
|
|
mkdir dawn
|
|
tar -xvf artifact.tar.gz -C dawn --strip-components=1
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
GG_BUILD_WEBGPU=1 GG_BUILD_WEBGPU_DAWN_PREFIX="$GITHUB_WORKSPACE/dawn" \
|
|
bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-vulkan-apple:
|
|
runs-on: [self-hosted, macOS, ARM64]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
vulkaninfo --summary
|
|
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-vulkan-intel-linux:
|
|
runs-on: [self-hosted, Linux, Intel]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
with:
|
|
persist-credentials: false
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
vulkaninfo --summary
|
|
GG_BUILD_VULKAN=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
gpu-vulkan-intel-windows:
|
|
runs-on: [self-hosted, Windows, X64, Intel]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
shell: C:\msys64\usr\bin\bash.exe --noprofile --norc -eo pipefail "{0}"
|
|
env:
|
|
MSYSTEM: UCRT64
|
|
CHERE_INVOKING: 1
|
|
PATH: C:\msys64\ucrt64\bin;C:\msys64\usr\bin;C:\Windows\System32;${{ env.PATH }}
|
|
run: |
|
|
vulkaninfo --summary
|
|
# Skip python related tests with GG_BUILD_LOW_PERF=1 since Windows MSYS2 UCRT64 currently fails to create
|
|
# a valid python environment for testing
|
|
LLAMA_FATAL_WARNINGS=OFF GG_BUILD_NINJA=1 GG_BUILD_VULKAN=1 GG_BUILD_LOW_PERF=1 ./ci/run.sh ./results/llama.cpp ./mnt/llama.cpp
|
|
|
|
gpu-openvino-low-perf:
|
|
runs-on: [self-hosted, Linux, Intel, OpenVINO]
|
|
|
|
env:
|
|
# Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
|
|
OPENVINO_VERSION_MAJOR: "2026.2.1"
|
|
OPENVINO_VERSION_FULL: "2026.2.1.21919.ede283a88e3"
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Setup OpenVINO Toolkit
|
|
uses: ./.github/actions/linux-setup-openvino
|
|
with:
|
|
path: ./openvino_toolkit
|
|
version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
|
|
version_full: ${{ env.OPENVINO_VERSION_FULL }}
|
|
|
|
- name: Install OpenVINO dependencies
|
|
run: |
|
|
cd ./openvino_toolkit
|
|
chmod +x ./install_dependencies/install_openvino_dependencies.sh
|
|
echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
source ./openvino_toolkit/setupvars.sh
|
|
GG_BUILD_OPENVINO=1 GGML_OPENVINO_DEVICE=GPU GG_BUILD_LOW_PERF=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
cpu-x64-high-perf:
|
|
runs-on: [self-hosted, Linux, X64]
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
cpu-arm64-high-perf-graviton4:
|
|
runs-on: ah-ubuntu_22_04-c8g_8x
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Dependencies
|
|
id: depends
|
|
run: |
|
|
set -euxo pipefail
|
|
sudo apt-get update
|
|
sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
|
|
apt-get install -y \
|
|
build-essential \
|
|
python3-venv \
|
|
gpg \
|
|
wget \
|
|
time \
|
|
git-lfs
|
|
|
|
git lfs install
|
|
|
|
# install the latest cmake
|
|
sudo install -d /usr/share/keyrings
|
|
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \
|
|
| gpg --dearmor \
|
|
| sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
|
|
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \
|
|
| sudo tee /etc/apt/sources.list.d/kitware.list
|
|
sudo apt-get update
|
|
sudo apt-get install -y cmake
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
LLAMA_ARG_THREADS=$(nproc) GG_BUILD_HIGH_PERF=1 GG_BUILD_NO_BF16=1 GG_BUILD_EXTRA_TESTS_0=1 bash ./ci/run.sh ~/results/llama.cpp ~/mnt/llama.cpp
|
|
|
|
cpu-arm64-graviton4-kleidiai:
|
|
runs-on: ah-ubuntu_22_04-c8g_8x
|
|
|
|
steps:
|
|
- name: Clone
|
|
id: checkout
|
|
uses: actions/checkout@v6
|
|
|
|
- name: Dependencies
|
|
id: depends
|
|
run: |
|
|
set -euxo pipefail
|
|
sudo apt-get update
|
|
sudo DEBIAN_FRONTEND=noninteractive NEEDRESTART_MODE=a \
|
|
apt-get install -y \
|
|
build-essential \
|
|
python3-venv \
|
|
gpg \
|
|
wget \
|
|
time \
|
|
git-lfs
|
|
|
|
git lfs install
|
|
|
|
# install the latest cmake
|
|
sudo install -d /usr/share/keyrings
|
|
wget -O - https://apt.kitware.com/keys/kitware-archive-latest.asc \
|
|
| gpg --dearmor \
|
|
| sudo tee /usr/share/keyrings/kitware-archive-keyring.gpg >/dev/null
|
|
echo 'deb [signed-by=/usr/share/keyrings/kitware-archive-keyring.gpg] https://apt.kitware.com/ubuntu/ jammy main' \
|
|
| sudo tee /etc/apt/sources.list.d/kitware.list
|
|
sudo apt-get update
|
|
sudo apt-get install -y cmake
|
|
|
|
- name: Test
|
|
id: ggml-ci
|
|
run: |
|
|
GG_BUILD_KLEIDIAI=1 \
|
|
GG_BUILD_EXTRA_TESTS_0=1 \
|
|
bash ./ci/run.sh ./tmp/results ./tmp/mnt
|