llama.cpp/.github/workflows/release.yml

name: Release

on:
  workflow_dispatch: # allows manual triggering
    inputs:
      create_release:
        description: 'Create new release'
        required: true
        type: boolean
  push:
    branches:
      - master
    paths: [
      '.github/workflows/release.yml',
      '**/CMakeLists.txt',
      '**/.cmake',
      '**/*.h',
      '**/*.hpp',
      '**/*.c',
      '**/*.cpp',
      '**/*.cu',
      '**/*.cuh',
      '**/*.swift',
      '**/*.m',
      '**/*.metal',
      '**/*.comp',
      '**/*.glsl'
    ]

env:
  GH_TOKEN: ${{ github.token }}
  BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
  CMAKE_ARGS: "-DLLAMA_BUILD_EXAMPLES=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_TOOLS=ON -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON"

# note: run this workflow one at a time for better cache reuse
concurrency:
  group: release
  queue: max

jobs:
  check-release:
    runs-on: ubuntu-slim

    outputs:
      should_release: ${{ steps.check.outputs.should_release }}

    steps:
      - id: check
        run: |
          if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
            echo "should_release=true" >> $GITHUB_OUTPUT
          elif [[ "${{ github.event_name }}" == "push" && "${{ github.ref }}" == "refs/heads/master" ]]; then
            if echo "${{ github.event.head_commit.message }}" | grep -q '\[no release\]'; then
              echo "should_release=false" >> $GITHUB_OUTPUT
            else
              echo "should_release=true" >> $GITHUB_OUTPUT
            fi
          else
            echo "should_release=false" >> $GITHUB_OUTPUT
          fi

  get-version:
    runs-on: ubuntu-slim
    outputs:
      ui_version: ${{ steps.version.outputs.ui_version }}
    steps:
      - uses: actions/checkout@v6
        with:
          fetch-depth: 0
      - id: version
        run: |
          # Resolve UI version: BUILD_NUMBER from cmake/build-info.cmake > git hash + epoch > fallback
          version=""
          if grep -q "BUILD_NUMBER" cmake/build-info.cmake; then
            build_number=$(grep "set(BUILD_NUMBER" cmake/build-info.cmake | grep -oP '\d+')
            if [ -n "$build_number" ] && [ "$build_number" -gt 0 ]; then
              version="b${build_number}"
            fi
          fi
          if [ -z "$version" ]; then
            version=$(git rev-parse --short HEAD)-$(date +%s)
          fi
          echo "ui_version=${version}" >> $GITHUB_OUTPUT

  macos-cpu:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}
    strategy:
      matrix:
        include:
          - build: 'arm64'
            arch: 'arm64'
            os: macos-26
            defines: "-DGGML_METAL_USE_BF16=ON -DGGML_METAL_EMBED_LIBRARY=ON"
          # TODO: this build is disabled to save Github Actions resources (https://github.com/ggml-org/llama.cpp/pull/23780)
          #       in order to enable it again, we have to provision dedicated runners  to run it
          #- build: 'arm64-kleidiai'
          #  arch: 'arm64'
          #  os: macos-14
          #  defines: "-DGGML_METAL_USE_BF16=ON -DGGML_METAL_EMBED_LIBRARY=ON -DGGML_CPU_KLEIDIAI=ON"
          - build: 'x64'
            arch: 'x64'
            os: macos-15-intel
            # Metal is disabled on x64 due to intermittent failures with Github runners not having a GPU:
            # https://github.com/ggml-org/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
            defines: "-DGGML_METAL=OFF -DCMAKE_OSX_DEPLOYMENT_TARGET=13.3"

    runs-on: ${{ matrix.os }}

    permissions:
      actions: write

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-${{ matrix.os }}-${{ matrix.arch }}

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build \
            ${{ matrix.defines }} \
            -DCMAKE_INSTALL_RPATH='@loader_path' \
            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DLLAMA_BUILD_BORINGSSL=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-${{ matrix.os }}-${{ matrix.arch }}

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-macos-${{ matrix.build }}.tar.gz -s ",^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-macos-${{ matrix.build }}.tar.gz
          name: llama-bin-macos-${{ matrix.build }}.tar.gz

  ubuntu-cpu:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}
    strategy:
      matrix:
        include:
          - build: 'x64'
            os: ubuntu-22.04
          - build: 'arm64'
            os: ubuntu-24.04-arm
          - build: 's390x'
            os: ubuntu-24.04-s390x

    runs-on: ${{ matrix.os }}

    permissions:
      actions: write

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Dependencies
        id: depends
        run: |
          sudo apt-get update
          sudo apt-get install build-essential libssl-dev

      - name: Toolchain workaround (GCC 14)
        if: ${{ contains(matrix.os, 'ubuntu-24.04') }}
        run: |
          sudo apt-get install -y gcc-14 g++-14
          echo "CC=gcc-14" >> "$GITHUB_ENV"
          echo "CXX=g++-14" >> "$GITHUB_ENV"

      - name: ccache
        if: ${{ matrix.build != 's390x' }}
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-${{ matrix.os }}-cpu

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
            -DGGML_BACKEND_DL=ON \
            -DGGML_NATIVE=OFF \
            -DGGML_CPU_ALL_VARIANTS=ON \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release -j $(nproc)

      - name: ccache-clear
        if: ${{ matrix.build != 's390x' }}
        uses: ./.github/actions/ccache-clear
        with:
          key: release-${{ matrix.os }}-cpu

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-${{ matrix.build }}.tar.gz
          name: llama-bin-ubuntu-${{ matrix.build }}.tar.gz

  ubuntu-vulkan:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    strategy:
      matrix:
        include:
          - build: 'x64'
            os: ubuntu-22.04
          - build: 'arm64'
            os: ubuntu-24.04-arm

    runs-on: ${{ matrix.os }}

    permissions:
      actions: write

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Dependencies
        id: depends
        run: |
          if [[ "${{ matrix.os }}" =~ "ubuntu-22.04" ]]; then
            wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
            sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
            sudo apt-get update -y
            sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk libssl-dev
          else
            sudo apt-get update -y
            sudo apt-get install -y gcc-14 g++-14 build-essential glslc libvulkan-dev spirv-headers libssl-dev ninja-build
            echo "CC=gcc-14" >> "$GITHUB_ENV"
            echo "CXX=g++-14" >> "$GITHUB_ENV"
          fi

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-${{ matrix.os }}-vulkan

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
            -DGGML_BACKEND_DL=ON \
            -DGGML_NATIVE=OFF \
            -DGGML_CPU_ALL_VARIANTS=ON \
            -DGGML_VULKAN=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release -j $(nproc)

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-${{ matrix.os }}-vulkan

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-${{ matrix.build }}.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-${{ matrix.build }}.tar.gz
          name: llama-bin-ubuntu-vulkan-${{ matrix.build }}.tar.gz

  android-arm64:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: ubuntu-latest

    #permissions:
    #  actions: write

    env:
      NDK_VERSION: "29.0.14206865"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Set up JDK
        uses: actions/setup-java@v5
        with:
          java-version: 17
          distribution: temurin

      - name: Setup Android SDK
        uses: android-actions/setup-android@40fd30fb8d7440372e1316f5d1809ec01dcd3699 # v4.0.1
        with:
          log-accepted-android-sdk-licenses: false

      - name: Install NDK
        run: |
          sdkmanager "ndk;${{ env.NDK_VERSION }}"
          echo "ANDROID_NDK=${ANDROID_SDK_ROOT}/ndk/${{ env.NDK_VERSION }}" >> $GITHUB_ENV

      # note : disabled to spare some cache space (https://github.com/ggml-org/llama.cpp/pull/23789)
      #        for some reason, the ccache does not improve the build time in this case
      # example:
      #   cache off: https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78160400831
      #   cache on:  https://github.com/ggerganov/tmp2/actions/runs/26534713799/job/78224189394
      #
      #- name: ccache
      #  uses: ggml-org/ccache-action@v1.2.21
      #  with:
      #    key: release-android-arm64

      - name: Build
        id: cmake_build
        run: |
          cmake -B build \
            -DCMAKE_TOOLCHAIN_FILE=${ANDROID_NDK}/build/cmake/android.toolchain.cmake \
            -DANDROID_ABI=arm64-v8a \
            -DANDROID_PLATFORM=android-28 \
            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
            -DGGML_BACKEND_DL=ON \
            -DGGML_NATIVE=OFF \
            -DGGML_CPU_ALL_VARIANTS=ON \
            -DLLAMA_FATAL_WARNINGS=ON \
            -DGGML_OPENMP=OFF \
            -DLLAMA_BUILD_BORINGSSL=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release -j $(nproc)

      #- name: ccache-clear
      #  uses: ./.github/actions/ccache-clear
      #  with:
      #    key: release-android-arm64

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-android-arm64.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-android-arm64.tar.gz
          name: llama-bin-android-arm64.tar.gz

  ubuntu-24-openvino:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: ubuntu-24.04

    permissions:
      actions: write

    outputs:
      openvino_version: ${{ steps.openvino_version.outputs.value }}

    env:
      # Sync versions in build.yml, build-self-hosted.yml, release.yml, build-cache.yml, .devops/openvino.Dockerfile
      OPENVINO_VERSION_MAJOR: "2026.0"
      OPENVINO_VERSION_FULL: "2026.0.0.20965.c6d6a13a886"

    steps:
      - name: Set OpenVINO version output
        id: openvino_version
        run: echo "value=${{ env.OPENVINO_VERSION_MAJOR }}" >> $GITHUB_OUTPUT

      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-ubuntu-24.04-openvino-release-no-preset-v1

      - name: Dependencies
        run: |
          sudo apt-get update
          sudo apt-get install -y build-essential libssl-dev libtbb12 cmake ninja-build python3-pip
          sudo apt install ocl-icd-opencl-dev opencl-headers opencl-clhpp-headers intel-opencl-icd

      - name: Use OpenVINO Toolkit Cache
        uses: actions/cache@v5
        id: cache-openvino
        with:
          path: ./openvino_toolkit
          key: cache-gha-openvino-toolkit-v${{ env.OPENVINO_VERSION_FULL }}-${{ runner.os }}

      - name: Setup OpenVINO Toolkit
        if: steps.cache-openvino.outputs.cache-hit != 'true'
        uses: ./.github/actions/linux-setup-openvino
        with:
          path: ./openvino_toolkit
          version_major: ${{ env.OPENVINO_VERSION_MAJOR }}
          version_full: ${{ env.OPENVINO_VERSION_FULL }}

      - name: Install OpenVINO dependencies
        run: |
          cd ./openvino_toolkit
          chmod +x ./install_dependencies/install_openvino_dependencies.sh
          echo "Y" | sudo -E ./install_dependencies/install_openvino_dependencies.sh

      - name: Build
        id: cmake_build
        run: |
          source ./openvino_toolkit/setupvars.sh
          cmake -B build/ReleaseOV -G Ninja \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_OPENVINO=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
          cmake --build build/ReleaseOV --config Release -j $(nproc)

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-ubuntu-24.04-openvino-release-no-preset-v1

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/ReleaseOV/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/ReleaseOV/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz
          name: llama-bin-ubuntu-openvino-${{ env.OPENVINO_VERSION_MAJOR }}-x64.tar.gz

  windows-cpu:
    needs: [check-release]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: windows-2025-vs2026

    permissions:
      actions: write

    strategy:
      matrix:
        include:
          - arch: 'x64'
          - arch: 'arm64'

    steps:
      - name: Clone
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Install Ninja
        run: |
          choco install ninja

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-windows-2025-vs2026-${{ matrix.arch }}-cpu

      - name: Build
        shell: cmd
        run: |
          call "C:\Program Files\Microsoft Visual Studio\18\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" ${{ matrix.arch == 'x64' && 'x64' || 'amd64_arm64' }}
          cmake -S . -B build -G "Ninja Multi-Config" ^
            -D CMAKE_TOOLCHAIN_FILE=cmake/${{ matrix.arch }}-windows-llvm.cmake ^
            -DLLAMA_BUILD_BORINGSSL=ON ^
            -DGGML_NATIVE=OFF ^
            -DGGML_BACKEND_DL=ON ^
            -DGGML_CPU_ALL_VARIANTS=${{ matrix.arch == 'x64' && 'ON' || 'OFF' }} ^
            -DGGML_OPENMP=ON ^
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-windows-2025-vs2026-${{ matrix.arch }}-cpu

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          Copy-Item "C:\Program Files\Microsoft Visual Studio\18\Enterprise\VC\Redist\MSVC\14.51.36231\debug_nonredist\${{ matrix.arch }}\Microsoft.VC145.OpenMP.LLVM\libomp140.${{ matrix.arch == 'x64' && 'x86_64' || 'aarch64' }}.dll" .\build\bin\Release\
          7z a -snl llama-bin-win-cpu-${{ matrix.arch }}.zip .\build\bin\Release\*

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-bin-win-cpu-${{ matrix.arch }}.zip
          name: llama-bin-win-cpu-${{ matrix.arch }}.zip

  windows:
    needs: [check-release]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: windows-2025

    permissions:
      actions: write

    env:
      OPENBLAS_VERSION: 0.3.23
      VULKAN_VERSION: 1.4.313.2

    strategy:
      matrix:
        include:
          - backend: 'vulkan'
            arch: 'x64'
            defines: '-DGGML_VULKAN=ON'
            target: 'ggml-vulkan'
          - backend: 'opencl-adreno'
            arch: 'arm64'
            defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
            target: 'ggml-opencl'

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Install Vulkan SDK
        id: get_vulkan
        if: ${{ matrix.backend == 'vulkan' }}
        run: |
          curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/vulkansdk-windows-X64-${env:VULKAN_VERSION}.exe"
          & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
          Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
          Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"

      - name: Install Ninja
        id: install_ninja
        run: |
          choco install ninja

      # TODO: these jobs need to use llvm toolchain in order to utilize the ccache
      #- name: ccache
      #  uses: ggml-org/ccache-action@v1.2.21
      #  with:
      #    key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }}

      - name: Install OpenCL Headers and Libs
        id: install_opencl
        if: ${{ matrix.backend == 'opencl-adreno' && matrix.arch == 'arm64' }}
        run: |
          git clone https://github.com/KhronosGroup/OpenCL-Headers
          cd OpenCL-Headers
          cmake -B build `
            -DBUILD_TESTING=OFF `
            -DOPENCL_HEADERS_BUILD_TESTING=OFF `
            -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
            -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
          cmake --build build --target install
          git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
          cd OpenCL-ICD-Loader
          cmake -B build-arm64-release `
            -A arm64 `
            -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
            -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
          cmake --build build-arm64-release --target install --config release

      - name: Build
        id: cmake_build
        run: |
          cmake -S . -B build ${{ matrix.defines }} -DGGML_NATIVE=OFF -DGGML_CPU=OFF -DGGML_BACKEND_DL=ON -DLLAMA_BUILD_BORINGSSL=ON
          cmake --build build --config Release --target ${{ matrix.target }}

      #- name: ccache-clear
      #  uses: ./.github/actions/ccache-clear
      #  with:
      #    key: release-windows-2025-${{ matrix.arch }}-${{ matrix.backend }}

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          7z a -snl llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip .\build\bin\Release\${{ matrix.target }}.dll

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip
          name: llama-bin-win-${{ matrix.backend }}-${{ matrix.arch }}.zip

  windows-cuda:
    needs: [check-release]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: windows-2022

    permissions:
      actions: write

    strategy:
      matrix:
        cuda: ['12.4', '13.3']

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Install Cuda Toolkit
        uses: ./.github/actions/windows-setup-cuda
        with:
          cuda_version: ${{ matrix.cuda }}

      - name: Install Ninja
        id: install_ninja
        run: |
          choco install ninja

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-windows-2022-x64-cuda-${{ matrix.cuda }}

      - name: Build
        id: cmake_build
        shell: cmd
        # TODO: Remove GGML_CUDA_CUB_3DOT2 flag once CCCL 3.2 is bundled within CTK and that CTK version is used in this project
        run: |
          call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64
          cmake -S . -B build -G "Ninja Multi-Config" ^
            -DGGML_BACKEND_DL=ON ^
            -DGGML_NATIVE=OFF ^
            -DGGML_CPU=OFF ^
            -DGGML_CUDA=ON ^
            -DLLAMA_BUILD_BORINGSSL=ON ^
            -DGGML_CUDA_CUB_3DOT2=ON
          set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
          cmake --build build --config Release -j %NINJA_JOBS% --target ggml-cuda

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-windows-2022-x64-cuda-${{ matrix.cuda }}

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          7z a -snl llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip .\build\bin\Release\ggml-cuda.dll

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
          name: llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip

      - name: Copy and pack Cuda runtime
        run: |
          echo "Cuda install location: ${{ env.CUDA_PATH }}"
          $dst='.\build\bin\cudart\'
          robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
          robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
          robocopy "${{env.CUDA_PATH}}\bin\x64" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
          7z a cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip $dst\*

      - name: Upload Cuda runtime
        uses: actions/upload-artifact@v6
        with:
          path: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip
          name: cudart-llama-bin-win-cuda-${{ matrix.cuda }}-x64.zip

  windows-sycl:

    runs-on: windows-2022

    defaults:
      run:
        shell: bash

    env:
      WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b60765d1-2b85-4e85-86b6-cb0e9563a699/intel-deep-learning-essentials-2025.3.3.18_offline.exe
      WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
      LEVEL_ZERO_SDK_URL: https://github.com/oneapi-src/level-zero/releases/download/v1.28.2/level-zero-win-sdk-1.28.2.zip
      ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
      ONEAPI_INSTALLER_VERSION: "2025.3.3"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Download & Install oneAPI
        shell: bash
        run: |
          scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL

      - name: Install Level Zero SDK
        shell: pwsh
        run: |
          Invoke-WebRequest -Uri "${{ env.LEVEL_ZERO_SDK_URL }}" -OutFile "level-zero-win-sdk.zip"
          Expand-Archive -Path "level-zero-win-sdk.zip" -DestinationPath "C:/level-zero-sdk" -Force
          "LEVEL_ZERO_V1_SDK_PATH=C:/level-zero-sdk" | Out-File -FilePath $env:GITHUB_ENV -Append

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-windows-2022-x64-sycl

      - name: Build
        id: cmake_build
        shell: cmd
        run: |
          call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force
          cmake -G "Ninja" -B build ^
            -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx ^
            -DCMAKE_BUILD_TYPE=Release ^
            -DGGML_BACKEND_DL=ON -DBUILD_SHARED_LIBS=ON ^
            -DGGML_CPU=OFF -DGGML_SYCL=ON ^
            -DLLAMA_BUILD_BORINGSSL=ON
          cmake --build build --target ggml-sycl -j %NUMBER_OF_PROCESSORS%

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-windows-2022-x64-sycl

      - name: Build the release package
        id: pack_artifacts
        run: |
          echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"

          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin

          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero_v2.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
          ZE_LOADER_DLL=$(find "${{ env.ONEAPI_ROOT }}" "$LEVEL_ZERO_V1_SDK_PATH" -iname ze_loader.dll -print -quit 2>/dev/null || true)
          if [ -n "$ZE_LOADER_DLL" ]; then
            echo "Using Level Zero loader: $ZE_LOADER_DLL"
            cp "$ZE_LOADER_DLL" ./build/bin
          else
            echo "Level Zero loader DLL not found in oneAPI or SDK; relying on system driver/runtime"
          fi

          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl-ls.exe" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-fallback-bfloat16.spv" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libsycl-native-bfloat16.spv" ./build/bin

          cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin

          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/tcm.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/tcm/latest/bin/libhwloc-15.dll" ./build/bin
          cp "${{ env.ONEAPI_ROOT }}/umf/latest/bin/umf.dll" ./build/bin

          echo "cp oneAPI running time dll files to ./build/bin done"
          7z a -snl llama-bin-win-sycl-x64.zip ./build/bin/*

      - name: Upload the release package
        uses: actions/upload-artifact@v6
        with:
          path: llama-bin-win-sycl-x64.zip
          name: llama-bin-win-sycl-x64.zip

  ubuntu-24-sycl:

    strategy:
      matrix:
        build: [fp32, fp16]
        include:
          - build: fp32
            fp16: OFF
          - build: fp16
            fp16: ON

    runs-on: ubuntu-24.04

    env:
      ONEAPI_ROOT: /opt/intel/oneapi/
      ONEAPI_INSTALLER_VERSION: "2025.3.3"
      LEVEL_ZERO_VERSION: "1.28.2"
      LEVEL_ZERO_UBUNTU_VERSION: "u24.04"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Download & Install oneAPI
        shell: bash
        run: |
          cd /tmp
          wget https://registrationcenter-download.intel.com/akdlm/IRC_NAS/56f7923a-adb8-43f3-8b02-2b60fcac8cab/intel-deep-learning-essentials-2025.3.3.16_offline.sh -O intel-deep-learning-essentials_offline.sh
          sudo bash intel-deep-learning-essentials_offline.sh -s -a --silent --eula accept

      - name: Install Level Zero SDK
        shell: bash
        run: |
          cd /tmp
          wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero.deb
          wget -q "https://github.com/oneapi-src/level-zero/releases/download/v${LEVEL_ZERO_VERSION}/level-zero-devel_${LEVEL_ZERO_VERSION}%2B${LEVEL_ZERO_UBUNTU_VERSION}_amd64.deb" -O level-zero-devel.deb
          sudo apt-get install -y ./level-zero.deb ./level-zero-devel.deb

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-ubuntu-24.04-sycl-${{ matrix.build }}

      - name: Build
        id: cmake_build
        run: |
          source /opt/intel/oneapi/setvars.sh
          cmake -B build \
            -G "Ninja" \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_SYCL=ON \
            -DCMAKE_C_COMPILER=icx \
            -DCMAKE_CXX_COMPILER=icpx \
            -DLLAMA_OPENSSL=OFF \
            -DGGML_NATIVE=OFF \
            -DGGML_SYCL_F16=${{ matrix.fp16 }}
          time cmake --build build --config Release -j $(nproc)

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-ubuntu-24.04-sycl-${{ matrix.build }}

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-sycl-${{ matrix.build }}-x64.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-sycl-${{ matrix.build }}-x64.tar.gz
          name: llama-bin-ubuntu-sycl-${{ matrix.build }}-x64.tar.gz

  ubuntu-22-rocm:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: ubuntu-22.04

    permissions:
      actions: write

    strategy:
      matrix:
        include:
          - ROCM_VERSION: "7.2.1"
            gpu_targets: "gfx908;gfx90a;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1151;gfx1150;gfx1200;gfx1201"
            build: 'x64'

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Free up disk space
        uses: ggml-org/free-disk-space@v1.3.1
        with:
          tool-cache: true

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }}

      - name: Dependencies
        id: depends
        run: |
          sudo apt install -y build-essential git cmake wget

      - name: Setup Legacy ROCm
        if: matrix.ROCM_VERSION == '7.2.1'
        id: legacy_env
        run: |
          sudo mkdir --parents --mode=0755 /etc/apt/keyrings
          wget https://repo.radeon.com/rocm/rocm.gpg.key -O - | \
            gpg --dearmor | sudo tee /etc/apt/keyrings/rocm.gpg > /dev/null

          sudo tee /etc/apt/sources.list.d/rocm.list << EOF
          deb [arch=amd64 signed-by=/etc/apt/keyrings/rocm.gpg] https://repo.radeon.com/rocm/apt/${{ matrix.ROCM_VERSION }} jammy main
          EOF

          sudo tee /etc/apt/preferences.d/rocm-pin-600 << EOF
          Package: *
          Pin: release o=repo.radeon.com
          Pin-Priority: 600
          EOF

          sudo apt update
          sudo apt-get install -y libssl-dev rocm-hip-sdk

      - name: Setup TheRock
        if: matrix.ROCM_VERSION != '7.2.1'
        id: therock_env
        run: |
          wget https://repo.amd.com/rocm/tarball/therock-dist-linux-gfx1151-${{ matrix.ROCM_VERSION }}.tar.gz
          mkdir install
          tar -xf *.tar.gz -C install
          export ROCM_PATH=$(pwd)/install
          echo ROCM_PATH=$ROCM_PATH >> $GITHUB_ENV
          echo PATH=$PATH:$ROCM_PATH/bin >> $GITHUB_ENV
          echo LD_LIBRARY_PATH=$ROCM_PATH/lib:$ROCM_PATH/llvm/lib:$ROCM_PATH/lib/rocprofiler-systems >> $GITHUB_ENV

      - name: Build with native CMake HIP support
        id: cmake_build
        run: |
          cmake -B build -S . \
            -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
            -DCMAKE_BUILD_TYPE=Release \
            -DGGML_BACKEND_DL=ON \
            -DGGML_NATIVE=OFF \
            -DCMAKE_INSTALL_RPATH='$ORIGIN' \
            -DCMAKE_BUILD_WITH_INSTALL_RPATH=ON \
            -DGGML_CPU_ALL_VARIANTS=ON \
            -DGPU_TARGETS="${{ matrix.gpu_targets }}" \
            -DGGML_HIP=ON \
            -DHIP_PLATFORM=amd \
            -DGGML_HIP_ROCWMMA_FATTN=ON \
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} \
            ${{ env.CMAKE_ARGS }}
          cmake --build build --config Release -j $(nproc)

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-ubuntu-22.04-rocm-${{ matrix.ROCM_VERSION }}

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Get ROCm short version
        run: echo "ROCM_VERSION_SHORT=$(echo '${{ matrix.ROCM_VERSION }}' | cut -d '.' -f 1,2)" >> $GITHUB_ENV

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          cp LICENSE ./build/bin/
          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz
          name: llama-bin-ubuntu-rocm-${{ env.ROCM_VERSION_SHORT }}-${{ matrix.build }}.tar.gz

  windows-hip:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}

    runs-on: windows-2022

    permissions:
      actions: write

    env:
      HIPSDK_INSTALLER_VERSION: "26.Q1"

    strategy:
      matrix:
        include:
          - name: "radeon"
            gpu_targets: "gfx1150;gfx1151;gfx1200;gfx1201;gfx1100;gfx1101;gfx1102;gfx1030;gfx1031;gfx1032"

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6

      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
          node-version: "24"
          cache: "npm"
          cache-dependency-path: "tools/ui/package-lock.json"

      - name: Grab rocWMMA package
        id: grab_rocwmma
        run: |
          curl -o rocwmma.deb "https://repo.radeon.com/rocm/apt/7.2.1/pool/main/r/rocwmma-dev/rocwmma-dev_2.2.0.70201-81~24.04_amd64.deb"
          7z x rocwmma.deb
          7z x data.tar

      - name: Cache ROCm Installation
        id: cache-rocm
        uses: actions/cache@v5
        with:
          path: C:\Program Files\AMD\ROCm
          key: cache-gha-rocm-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ runner.os }}

      - name: ccache
        uses: ggml-org/ccache-action@v1.2.21
        with:
          key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}

      - name: Install ROCm
        if: steps.cache-rocm.outputs.cache-hit != 'true'
        id: depends
        run: |
          $ErrorActionPreference = "Stop"
          write-host "Downloading AMD HIP SDK Installer"
          Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-${{ env.HIPSDK_INSTALLER_VERSION }}-Win11-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
          write-host "Installing AMD HIP SDK"
          $proc = Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -PassThru
          $completed = $proc.WaitForExit(600000)
          if (-not $completed) {
              Write-Error "ROCm installation timed out after 10 minutes. Killing the process"
              $proc.Kill()
              exit 1
          }
          if ($proc.ExitCode -ne 0) {
              Write-Error "ROCm installation failed with exit code $($proc.ExitCode)"
              exit 1
          }
          write-host "Completed AMD HIP SDK installation"

      - name: Verify ROCm
        id: verify
        run: |
          # Find and test ROCm installation
          $clangPath = Get-ChildItem 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | Select-Object -First 1
          if (-not $clangPath) {
            Write-Error "ROCm installation not found"
            exit 1
          }
          & $clangPath.FullName --version

      - name: Build
        id: cmake_build
        run: |
          $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
          $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
          cmake -G "Unix Makefiles" -B build -S . `
            -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
            -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
            -DCMAKE_CXX_FLAGS="-I$($PWD.Path.Replace('\', '/'))/opt/rocm-7.2.1/include/ -Wno-ignored-attributes -Wno-nested-anon-types" `
            -DCMAKE_BUILD_TYPE=Release `
            -DGGML_BACKEND_DL=ON `
            -DGGML_NATIVE=OFF `
            -DGGML_CPU=OFF `
            -DGPU_TARGETS="${{ matrix.gpu_targets }}" `
            -DGGML_HIP_ROCWMMA_FATTN=ON `
            -DGGML_HIP=ON `
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }} `
            -DLLAMA_BUILD_BORINGSSL=ON
          cmake --build build --target ggml-hip -j ${env:NUMBER_OF_PROCESSORS}
          md "build\bin\rocblas\library\"
          md "build\bin\hipblaslt\library"
          cp "${env:HIP_PATH}\bin\libhipblas.dll" "build\bin\"
          cp "${env:HIP_PATH}\bin\libhipblaslt.dll" "build\bin\"
          cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
          cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
          cp "${env:HIP_PATH}\bin\hipblaslt\library\*" "build\bin\hipblaslt\library\"

      - name: ccache-clear
        uses: ./.github/actions/ccache-clear
        with:
          key: release-windows-2022-x64-hip-${{ env.HIPSDK_INSTALLER_VERSION }}-${{ matrix.name }}

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          7z a -snl llama-bin-win-hip-${{ matrix.name }}-x64.zip .\build\bin\*

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-bin-win-hip-${{ matrix.name }}-x64.zip
          name: llama-bin-win-hip-${{ matrix.name }}-x64.zip

  ios-xcode:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}
    runs-on: macos-26

    steps:
      - name: Checkout code
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Setup Xcode
        run: |
          sudo xcode-select -s /Applications/Xcode_26.4.app

      - name: Build
        id: cmake_build
        run: |
          sysctl -a
          cmake -B build -G Xcode \
            -DGGML_METAL_USE_BF16=ON \
            -DGGML_METAL_EMBED_LIBRARY=ON \
            -DLLAMA_OPENSSL=OFF \
            -DLLAMA_BUILD_APP=OFF \
            -DLLAMA_BUILD_EXAMPLES=OFF \
            -DLLAMA_BUILD_TOOLS=OFF \
            -DLLAMA_BUILD_TESTS=OFF \
            -DLLAMA_BUILD_SERVER=OFF \
            -DCMAKE_SYSTEM_NAME=iOS \
            -DCMAKE_OSX_DEPLOYMENT_TARGET=16.0 \
            -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
            -DHF_UI_VERSION=${{ needs.get-version.outputs.ui_version }}
          cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO

      - name: xcodebuild for swift package
        id: xcodebuild
        run: |
          ./build-xcframework.sh

      - name: Build Xcode project
        run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' FRAMEWORK_FOLDER_PATH=./build-ios build

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Pack artifacts
        id: pack_artifacts
        run: |
          # Zip file is required for Swift Package Manager, which does not support tar.gz for binary targets.
          # For more details, see https://developer.apple.com/documentation/xcode/distributing-binary-frameworks-as-swift-packages
          zip -r -y llama-${{ steps.tag.outputs.name }}-xcframework.zip build-apple/llama.xcframework

      - name: Upload artifacts
        uses: actions/upload-artifact@v6
        with:
          path: llama-${{ steps.tag.outputs.name }}-xcframework.zip
          name: llama-${{ steps.tag.outputs.name }}-xcframework.zip

# TODO: this build is disabled to save Github Actions resources (https://github.com/ggml-org/llama.cpp/pull/23705)
#       in order to enable it again, we have to provision dedicated runners  to run it
#  openEuler-cann:
#    strategy:
#      matrix:
#        include:
#          # 910b with aclgraph (both architectures)
#          - arch: x86
#            chip_type: '910b'
#            build: 'Release'
#            use_acl_graph: 'on'
#          - arch: aarch64
#            chip_type: '910b'
#            build: 'Release'
#            use_acl_graph: 'on'
#          # 310p without aclgraph (both architectures)
#          - arch: x86
#            chip_type: '310p'
#            build: 'Release'
#            use_acl_graph: 'off'
#          - arch: aarch64
#            chip_type: '310p'
#            build: 'Release'
#            use_acl_graph: 'off'
#    runs-on: ${{ matrix.arch == 'aarch64' && 'ubuntu-24.04-arm' || 'ubuntu-24.04' }}
#    steps:
#      - name: Checkout
#        uses: actions/checkout@v6
#        with:
#          fetch-depth: 0
#
#      - name: Free up disk space
#        uses: ggml-org/free-disk-space@v1.3.1
#        with:
#          tool-cache: true
#
#      - name: Set container image
#        id: cann-image
#        run: |
#          image="ascendai/cann:${{ matrix.chip_type == '910b' &&  '8.5.0-910b-openeuler24.03-py3.11' || '8.5.0-310p-openeuler24.03-py3.11' }}"
#          echo "image=${image}" >> "${GITHUB_OUTPUT}"
#
#      - name: Pull container image
#        run: docker pull "${{ steps.cann-image.outputs.image }}"
#
#      - name: Build
#        env:
#          BUILD_TYPE: ${{ matrix.build }}
#          SOC_TYPE: ascend${{ matrix.chip_type }}
#          USE_ACL_GRAPH: ${{ matrix.use_acl_graph }}
#        run: |
#          HOST_UID=$(id -u)
#          HOST_GID=$(id -g)
#
#          docker run --rm \
#            -v "${PWD}:/workspace" \
#            -w /workspace \
#            -e SOC_TYPE=${SOC_TYPE} \
#            -e BUILD_TYPE=${BUILD_TYPE} \
#            -e USE_ACL_GRAPH=${USE_ACL_GRAPH} \
#            "${{ steps.cann-image.outputs.image }}" \
#            bash -lc '
#              set -e
#              yum install -y --setopt=install_weak_deps=False --setopt=tsflags=nodocs git gcc gcc-c++ make cmake openssl-devel
#              yum clean all && rm -rf /var/cache/yum
#              git config --global --add safe.directory "/workspace"
#              export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
#              cmake -S . -B build \
#                  -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
#                  -DGGML_CANN=on \
#                  -DSOC_TYPE=${SOC_TYPE} \
#                  -DUSE_ACL_GRAPH=${USE_ACL_GRAPH}
#              cmake --build build -j $(nproc)
#
#              chown -R '"${HOST_UID}"':'"${HOST_GID}"' /workspace/build
#            '
#
#      - name: Determine tag name
#        id: tag
#        uses: ./.github/actions/get-tag-name
#
#      - name: Pack artifacts
#        run: |
#          cp LICENSE ./build/bin/
#          tar -czvf llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./build/bin .
#
#      - name: Upload artifacts
#        uses: actions/upload-artifact@v6
#        with:
#          path: llama-${{ steps.tag.outputs.name }}-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz
#          name: llama-bin-${{ matrix.chip_type }}-openEuler-${{ matrix.arch }}${{ matrix.use_acl_graph == 'on' && '-aclgraph' || '' }}.tar.gz

  ui-build:
    needs: [check-release, get-version]
    if: ${{ needs.check-release.outputs.should_release == 'true' }}
    uses: ./.github/workflows/ui-build.yml
    with:
      hf_ui_version: ${{ needs.get-version.outputs.ui_version }}

  release:
    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

    # Fine-grant permission
    # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
    permissions:
        contents: write # for creating release

    runs-on: ubuntu-slim

    needs:
      - get-version
      - windows
      - windows-cpu
      - windows-cuda
      #- windows-sycl
      - windows-hip
      - ubuntu-22-rocm
      - ubuntu-cpu
      - ubuntu-vulkan
      - ubuntu-24-openvino
      #- ubuntu-24-sycl
      - android-arm64
      - macos-cpu
      - ios-xcode
      #- openEuler-cann
      - ui

    outputs:
      tag_name: ${{ steps.tag.outputs.name }}

    steps:
      - name: Clone
        id: checkout
        uses: actions/checkout@v6
        with:
          fetch-depth: 0

      - name: Determine tag name
        id: tag
        uses: ./.github/actions/get-tag-name

      - name: Download artifacts
        id: download-artifact
        uses: actions/download-artifact@v7
        with:
          path: ./artifact
          merge-multiple: true

      - name: Move artifacts
        id: move_artifacts
        run: |
          mkdir -p release

          echo "Adding CPU backend files to existing zips..."
          for arch in x64 arm64; do
            cpu_zip="artifact/llama-bin-win-cpu-${arch}.zip"
            temp_dir=$(mktemp -d)
            echo "Extracting CPU backend for $arch..."
            unzip "$cpu_zip" -d "$temp_dir"

            echo "Adding CPU files to $arch zips..."
            for target_zip in artifact/llama-bin-win-*-${arch}.zip; do
              if [[ "$target_zip" == "$cpu_zip" ]]; then
                continue
              fi
              echo "Adding CPU backend to $(basename "$target_zip")"
              realpath_target_zip=$(realpath "$target_zip")
              (cd "$temp_dir" && zip -r "$realpath_target_zip" .)
            done

            rm -rf "$temp_dir"
          done

          echo "Renaming and moving zips to release..."
          for zip_file in artifact/llama-bin-win-*.zip; do
            base_name=$(basename "$zip_file" .zip)
            zip_name="llama-${{ steps.tag.outputs.name }}-${base_name#llama-}.zip"
            echo "Moving $zip_file to release/$zip_name"
            mv "$zip_file" "release/$zip_name"
          done

          echo "Moving other artifacts..."
          mv -v artifact/*.zip release
          mv -v artifact/*.tar.gz release

      - name: Download UI build
        id: download_ui
        uses: actions/download-artifact@v7
        with:
          name: ui-build
          path: ./ui-dist

      - name: Package UI
        id: package_ui
        run: |
          tar -czvf release/llama-${{ steps.tag.outputs.name }}-ui.tar.gz --transform "s,^\.,llama-${{ steps.tag.outputs.name }}," -C ./ui-dist .

      - name: Create release
        id: create_release
        uses: ggml-org/action-create-release@v1
        env:
          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
        with:
          tag_name: ${{ steps.tag.outputs.name }}
          body: |
            <details open>

            ${{ github.event.head_commit.message }}

            </details>

            **macOS/iOS:**
            - [macOS Apple Silicon (arm64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.tar.gz)
            - macOS Apple Silicon (arm64, KleidiAI enabled) [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23780)
            - [macOS Intel (x64)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-macos-x64.tar.gz)
            - [iOS XCFramework](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-xcframework.zip)

            **Linux:**
            - [Ubuntu x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.tar.gz)
            - [Ubuntu arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-arm64.tar.gz)
            - [Ubuntu s390x (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-s390x.tar.gz)
            - [Ubuntu x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-x64.tar.gz)
            - [Ubuntu arm64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-vulkan-arm64.tar.gz)
            - [Ubuntu x64 (ROCm 7.2)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-rocm-7.2-x64.tar.gz)
            - [Ubuntu x64 (OpenVINO)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-ubuntu-openvino-${{ needs.ubuntu-24-openvino.outputs.openvino_version }}-x64.tar.gz)
            - Ubuntu x64 (SYCL FP32) [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23705)

            **Android:**
            - [Android arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-android-arm64.tar.gz)

            **Windows:**
            - [Windows x64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-x64.zip)
            - [Windows arm64 (CPU)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cpu-arm64.zip)
            - [Windows x64 (CUDA 12)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-12.4-x64.zip) - [CUDA 12.4 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-12.4-x64.zip)
            - [Windows x64 (CUDA 13)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-cuda-13.3-x64.zip) - [CUDA 13.3 DLLs](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/cudart-llama-bin-win-cuda-13.3-x64.zip)
            - [Windows x64 (Vulkan)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-vulkan-x64.zip)
            - Windows x64 (SYCL) [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23705)
            - [Windows x64 (HIP)](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-bin-win-hip-radeon-x64.zip)

            **openEuler:**
            - [DISABLED](https://github.com/ggml-org/llama.cpp/pull/23705)
            - openEuler x86 (310p)
            - openEuler x86 (910b, ACL Graph)
            - openEuler aarch64 (310p)
            - openEuler aarch64 (910b, ACL Graph)

            **UI:**
            - [UI](https://github.com/ggml-org/llama.cpp/releases/download/${{ steps.tag.outputs.name }}/llama-${{ steps.tag.outputs.name }}-ui.tar.gz)

      - name: Upload release
        id: upload_release
        uses: actions/github-script@v8
        with:
          github-token: ${{secrets.GITHUB_TOKEN}}
          script: |
            const path = require('path');
            const fs = require('fs');
            const release_id = '${{ steps.create_release.outputs.id }}';
            for (let file of await fs.readdirSync('./release')) {
              if (path.extname(file) === '.zip' || file.endsWith('.tar.gz')) {
                console.log('uploadReleaseAsset', file);
                await github.rest.repos.uploadReleaseAsset({
                  owner: context.repo.owner,
                  repo: context.repo.repo,
                  release_id: release_id,
                  name: file,
                  data: await fs.readFileSync(`./release/${file}`)
                });
              }
            }

  ui-publish:
    if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}

    needs:
      - release

    uses: ./.github/workflows/ui-publish.yml
    with:
      version_tag: ${{ needs.release.outputs.tag_name }}
    secrets:
      hf_token: ${{ secrets.HF_TOKEN_UI_STATIC_OUTPUT }}