sycl : Make GGML_SYCL_F16=ON the default (#23996)

* Add -cl-fp32-correctly-rounded-divide-sqrt to F16=ON builds Signed-off-by: Todd Malsbary <todd.malsbary@intel.com> * Make GGML_SYCL_F16=ON the default Signed-off-by: Todd Malsbary <todd.malsbary@intel.com> * Leave F32 the default F16 remains explictly set for example and Dockerfile builds. Signed-off-by: Todd Malsbary <todd.malsbary@intel.com> * Revert changes to examples/sycl/build scripts Signed-off-by: Todd Malsbary <todd.malsbary@intel.com> --------- Signed-off-by: Todd Malsbary <todd.malsbary@intel.com>
2026-06-27 23:50:20 -05:00 · 2026-06-15 22:34:02 -07:00 · 2026-06-15 22:34:02 -07:00 · 4196b477da
commit 4196b477da
parent ad39ccaa19
2 changed files with 15 additions and 12 deletions
--- a/.devops/intel.Dockerfile
+++ b/.devops/intel.Dockerfile
@ -7,7 +7,7 @@ ARG APP_REVISION=N/A

 FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS build

-ARG GGML_SYCL_F16=OFF
+ARG GGML_SYCL_F16=ON
 ARG LEVEL_ZERO_VERSION=1.28.2
 ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04
 RUN apt-get update && \
@ -24,7 +24,8 @@ COPY . .

 RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
        echo "GGML_SYCL_F16 is set" \
-        && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
+        && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON" \
+        && export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"; \
    fi && \
    echo "Building with dynamic libs" && \
    cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \
--- a/docs/backend/SYCL.md
+++ b/docs/backend/SYCL.md
@ -253,6 +253,7 @@ When targeting an intel GPU, the user should expect one or more devices among th
 #### Intel GPU

 ```sh
+# Uses FP32, consider using FP16 for better performance in most cases
 ./examples/sycl/build.sh
 ```

@ -262,12 +263,12 @@ or
 # Export relevant ENV variables
 source /opt/intel/oneapi/setvars.sh

-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
-
-# Option 2: Use FP16
+# Option 1: Use FP16 (recommended for better performance in most cases)
 cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON

+# Option 2: Use FP32
+cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
+
 # build all binary
 cmake --build build --config Release -j -v
 ```
@ -469,6 +470,7 @@ Choose one of following methods to build from source code.
 ##### Option 1: Script

 ```sh
+# Uses FP32, consider using FP16 for better performance in most cases
 .\examples\sycl\win-build-sycl.bat
 ```

@ -479,11 +481,11 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru
 ```
@call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force

-# Option 1: Use FP32 (recommended for better performance in most cases)
-cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release
+# Option 1: Use FP16 (recommended for better performance in most cases)
+cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON

-# Option 2: Or FP16
-cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx  -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON
+# Option 2: Or FP32
+cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release

 cmake --build build --config Release -j
 ```
@ -491,10 +493,10 @@ cmake --build build --config Release -j
 Or, use CMake presets to build:

 ```sh
-cmake --preset x64-windows-sycl-release
+cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release
 cmake --build build-x64-windows-sycl-release -j --target llama-completion

-cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release
+cmake --preset x64-windows-sycl-release
 cmake --build build-x64-windows-sycl-release -j --target llama-completion

 cmake --preset x64-windows-sycl-debug