From 58728bdbf0421f3f6d4407436ccbadf0ee90ab53 Mon Sep 17 00:00:00 2001 From: Neo Zhang Date: Wed, 17 Jun 2026 13:58:03 +0800 Subject: [PATCH] sycl : Enable to support fp16 by OPs: SQR, SQRT, LOG, SIN, COS, CLAMP (#24692) --- docs/ops.md | 12 ++++----- docs/ops/SYCL.csv | 44 ++++++++++++++++---------------- ggml/src/ggml-sycl/ggml-sycl.cpp | 5 ---- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/docs/ops.md b/docs/ops.md index fe74df805e..d46f9b7313 100644 --- a/docs/ops.md +++ b/docs/ops.md @@ -23,7 +23,7 @@ Legend: | ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ | | CEIL | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | -| CLAMP | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ❌ | ❌ | +| CLAMP | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ❌ | ❌ | | COL2IM_1D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ | @@ -32,7 +32,7 @@ Legend: | CONV_3D | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| COS | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | +| COS | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | | COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ | | CROSS_ENTROPY_LOSS | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | @@ -65,7 +65,7 @@ Legend: | IM2COL_3D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | | L2_NORM | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ | -| LOG | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ | +| LOG | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | | MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | @@ -99,13 +99,13 @@ Legend: | SIGMOID | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | SILU | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ | | SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ | -| SIN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | +| SIN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | 🟡 | ✅ | ❌ | ❌ | | SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | | SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ | | SOLVE_TRI | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ | -| SQR | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ | -| SQRT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ | +| SQR | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | +| SQRT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | ✅ | ❌ | ❌ | | SSM_CONV | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | | SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ | | STEP | ❌ | ✅ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | diff --git a/docs/ops/SYCL.csv b/docs/ops/SYCL.csv index 9ff664a5bf..19a5346f88 100644 --- a/docs/ops/SYCL.csv +++ b/docs/ops/SYCL.csv @@ -9743,29 +9743,29 @@ "SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","SYCL" "SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","SYCL" "SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","SYCL" -"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","0","no","SYCL" -"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","0","no","SYCL" -"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","0","no","SYCL" -"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","0","no","SYCL" -"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","SYCL" +"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL" +"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","SYCL" +"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL" +"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" +"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","SYCL" "SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" "SYCL0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" "SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" "SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL" -"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","SQR","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL" -"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","SQRT","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL" -"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","LOG","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL" -"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","SIN","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL" -"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","0","no","SYCL" -"SYCL0","COS","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL" -"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","SYCL" -"SYCL0","CLAMP","type=f16,ne=[1024,1024,1,1],min=-0.500000,max=0.500000","support","0","no","SYCL" +"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","SQR","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","SQRT","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","LOG","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","SIN","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" +"SYCL0","COS","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL" +"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","SYCL" +"SYCL0","CLAMP","type=f16,ne=[1024,1024,1,1],min=-0.500000,max=0.500000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","SYCL" "SYCL0","LEAKY_RELU","type=f16,ne_a=[1024,1024,1,1],negative_slope=0.100000","support","1","yes","SYCL" "SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL" @@ -11046,8 +11046,8 @@ "SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL" @@ -11095,8 +11095,8 @@ "SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL" -"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","SYCL" +"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","SYCL" "SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL" "SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL" diff --git a/ggml/src/ggml-sycl/ggml-sycl.cpp b/ggml/src/ggml-sycl/ggml-sycl.cpp index 376d4376fc..057e0dccda 100644 --- a/ggml/src/ggml-sycl/ggml-sycl.cpp +++ b/ggml/src/ggml-sycl/ggml-sycl.cpp @@ -5582,11 +5582,6 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g case GGML_OP_COS: case GGML_OP_CLAMP: case GGML_OP_LOG: -#if defined (GGML_SYCL_F16) - return ((op->type == GGML_TYPE_F32 || op->type == GGML_SYCL_F16) && (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_SYCL_F16) && (op->type == op->src[0]->type)); -#else - return (op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32) && (op->type == op->src[0]->type); -#endif case GGML_OP_NORM: case GGML_OP_L2_NORM: case GGML_OP_GROUP_NORM: