diff --git a/.devops/intel.Dockerfile b/.devops/intel.Dockerfile index d2c03cec5c..4d0c0a8fd8 100644 --- a/.devops/intel.Dockerfile +++ b/.devops/intel.Dockerfile @@ -7,7 +7,7 @@ ARG APP_REVISION=N/A FROM docker.io/intel/deep-learning-essentials:$ONEAPI_VERSION AS build -ARG GGML_SYCL_F16=OFF +ARG GGML_SYCL_F16=ON ARG LEVEL_ZERO_VERSION=1.28.2 ARG LEVEL_ZERO_UBUNTU_VERSION=u24.04 RUN apt-get update && \ @@ -24,7 +24,8 @@ COPY . . RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \ echo "GGML_SYCL_F16 is set" \ - && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \ + && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON" \ + && export SYCL_PROGRAM_COMPILE_OPTIONS="-cl-fp32-correctly-rounded-divide-sqrt"; \ fi && \ echo "Building with dynamic libs" && \ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON -DLLAMA_BUILD_TESTS=OFF ${OPT_SYCL_F16} && \ diff --git a/docs/backend/SYCL.md b/docs/backend/SYCL.md index 3ea94d9d78..18307d170b 100644 --- a/docs/backend/SYCL.md +++ b/docs/backend/SYCL.md @@ -253,6 +253,7 @@ When targeting an intel GPU, the user should expect one or more devices among th #### Intel GPU ```sh +# Uses FP32, consider using FP16 for better performance in most cases ./examples/sycl/build.sh ``` @@ -262,12 +263,12 @@ or # Export relevant ENV variables source /opt/intel/oneapi/setvars.sh -# Option 1: Use FP32 (recommended for better performance in most cases) -cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx - -# Option 2: Use FP16 +# Option 1: Use FP16 (recommended for better performance in most cases) cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON +# Option 2: Use FP32 +cmake -B build -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx + # build all binary cmake --build build --config Release -j -v ``` @@ -469,6 +470,7 @@ Choose one of following methods to build from source code. ##### Option 1: Script ```sh +# Uses FP32, consider using FP16 for better performance in most cases .\examples\sycl\win-build-sycl.bat ``` @@ -479,11 +481,11 @@ On the oneAPI command line window, step into the llama.cpp main directory and ru ``` @call "C:\Program Files (x86)\Intel\oneAPI\setvars.bat" intel64 --force -# Option 1: Use FP32 (recommended for better performance in most cases) -cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release +# Option 1: Use FP16 (recommended for better performance in most cases) +cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON -# Option 2: Or FP16 -cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release -DGGML_SYCL_F16=ON +# Option 2: Or FP32 +cmake -B build -G "Ninja" -DGGML_SYCL=ON -DCMAKE_C_COMPILER=cl -DCMAKE_CXX_COMPILER=icx -DCMAKE_BUILD_TYPE=Release cmake --build build --config Release -j ``` @@ -491,10 +493,10 @@ cmake --build build --config Release -j Or, use CMake presets to build: ```sh -cmake --preset x64-windows-sycl-release +cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release cmake --build build-x64-windows-sycl-release -j --target llama-completion -cmake -DGGML_SYCL_F16=ON --preset x64-windows-sycl-release +cmake --preset x64-windows-sycl-release cmake --build build-x64-windows-sycl-release -j --target llama-completion cmake --preset x64-windows-sycl-debug