sycl : Enable to support fp16 by OPs: SQR, SQRT, LOG, SIN, COS, CLAMP (#24692)

This commit is contained in:
Neo Zhang 2026-06-17 13:58:03 +08:00 committed by GitHub
parent ebbc1e51c1
commit 58728bdbf0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 28 additions and 33 deletions

View File

@ -23,7 +23,7 @@ Legend:
| ARGMAX | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
| ARGSORT | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ✅ | ❌ | ❌ |
| CEIL | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
| CLAMP | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| CLAMP | ❌ | ✅ | ✅ | ✅ | ✅ | 🟡 | | 🟡 | ✅ | ❌ | ❌ |
| COL2IM_1D | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| CONCAT | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
| CONT | ❌ | 🟡 | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | 🟡 | ❌ | ❌ |
@ -32,7 +32,7 @@ Legend:
| CONV_3D | ❌ | ❌ | ✅ | ❌ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
| CONV_TRANSPOSE_1D | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
| CONV_TRANSPOSE_2D | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
| COS | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| COS | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | 🟡 | ✅ | ❌ | ❌ |
| COUNT_EQUAL | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
| CPY | ❌ | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | ❌ | ❌ |
| CROSS_ENTROPY_LOSS | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ | ❌ |
@ -65,7 +65,7 @@ Legend:
| IM2COL_3D | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ |
| L2_NORM | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
| LEAKY_RELU | ❌ | ✅ | ✅ | ✅ | 🟡 | ❌ | ✅ | 🟡 | ❌ | ❌ | ❌ |
| LOG | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ |
| LOG | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | ✅ | ✅ | ❌ | ❌ |
| MEAN | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ |
| MUL | ❌ | ✅ | ✅ | ✅ | 🟡 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
| MUL_MAT | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 | 🟡 |
@ -99,13 +99,13 @@ Legend:
| SIGMOID | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
| SILU | ❌ | ✅ | ✅ | 🟡 | ✅ | 🟡 | ✅ | ✅ | ✅ | ❌ | ❌ |
| SILU_BACK | ❌ | ❌ | ✅ | ✅ | ❌ | ❌ | ❌ | ✅ | ❌ | ❌ | ❌ |
| SIN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| SIN | ❌ | ✅ | ✅ | ✅ | ✅ | ❌ | | 🟡 | ✅ | ❌ | ❌ |
| SOFTPLUS | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |
| SOFT_MAX | ❌ | 🟡 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
| SOFT_MAX_BACK | ❌ | ❌ | 🟡 | 🟡 | ❌ | ❌ | 🟡 | ✅ | ❌ | ❌ | ❌ |
| SOLVE_TRI | ❌ | ❌ | ✅ | 🟡 | ✅ | ❌ | 🟡 | ✅ | ✅ | ❌ | ❌ |
| SQR | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| SQRT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| SQR | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | | 🟡 | ✅ | ❌ | ❌ |
| SQRT | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | | 🟡 | ✅ | ❌ | ❌ |
| SSM_CONV | ❌ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ |
| SSM_SCAN | ❌ | ❌ | ✅ | ✅ | ✅ | ❌ | 🟡 | 🟡 | ✅ | ❌ | ❌ |
| STEP | ❌ | ✅ | ✅ | 🟡 | ✅ | ❌ | ✅ | ✅ | ✅ | ❌ | ❌ |

View File

@ -9743,29 +9743,29 @@
"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=1","support","1","yes","SYCL"
"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=32","support","1","yes","SYCL"
"SYCL0","ADD_ID","type_a=f32,type_b=f32,n_embd=129,n_experts=8,n_experts_used=4,n_token=129","support","1","yes","SYCL"
"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","0","no","SYCL"
"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","0","no","SYCL"
"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","0","no","SYCL"
"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","0","no","SYCL"
"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","0","no","SYCL"
"SYCL0","SQR","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL"
"SYCL0","SQRT","type=f16,ne=[10,3,3,2]","support","1","yes","SYCL"
"SYCL0","LOG","type=f16,ne=[10,5,4,3]","support","1","yes","SYCL"
"SYCL0","SIN","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","COS","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","CLAMP","type=f16,ne=[10,5,4,3],min=-0.500000,max=0.500000","support","1","yes","SYCL"
"SYCL0","LEAKY_RELU","type=f16,ne_a=[10,5,4,3],negative_slope=0.100000","support","1","yes","SYCL"
"SYCL0","FLOOR","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","CEIL","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","ROUND","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","TRUNC","type=f16,ne=[10,2,2,2]","support","1","yes","SYCL"
"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
"SYCL0","SQR","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
"SYCL0","SQRT","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
"SYCL0","LOG","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
"SYCL0","SIN","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","0","no","SYCL"
"SYCL0","COS","type=f16,ne=[1024,1024,1,1]","support","0","no","SYCL"
"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","0","no","SYCL"
"SYCL0","CLAMP","type=f16,ne=[1024,1024,1,1],min=-0.500000,max=0.500000","support","0","no","SYCL"
"SYCL0","SQR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
"SYCL0","SQR","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
"SYCL0","SQRT","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
"SYCL0","SQRT","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
"SYCL0","LOG","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
"SYCL0","LOG","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
"SYCL0","SIN","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
"SYCL0","SIN","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
"SYCL0","COS","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
"SYCL0","COS","type=f16,ne=[1024,1024,1,1]","support","1","yes","SYCL"
"SYCL0","CLAMP","type=f16,ne=[7,1,5,3],min=-0.500000,max=0.500000","support","1","yes","SYCL"
"SYCL0","CLAMP","type=f16,ne=[1024,1024,1,1],min=-0.500000,max=0.500000","support","1","yes","SYCL"
"SYCL0","LEAKY_RELU","type=f16,ne_a=[7,1,5,3],negative_slope=0.100000","support","1","yes","SYCL"
"SYCL0","LEAKY_RELU","type=f16,ne_a=[1024,1024,1,1],negative_slope=0.100000","support","1","yes","SYCL"
"SYCL0","FLOOR","type=f16,ne=[7,1,5,3]","support","1","yes","SYCL"
@ -11046,8 +11046,8 @@
"SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL"
@ -11095,8 +11095,8 @@
"SYCL0","ARGSORT","type=f32,ne=[8192,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[16383,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[16384,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32767,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[32768,1,1,1],order=0","support","1","yes","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[65535,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[65536,1,1,1],order=0","support","0","no","SYCL"
"SYCL0","ARGSORT","type=f32,ne=[131071,1,1,1],order=0","support","0","no","SYCL"

Can't render this file because it is too large.

View File

@ -5582,11 +5582,6 @@ static bool ggml_backend_sycl_device_supports_op(ggml_backend_dev_t dev, const g
case GGML_OP_COS:
case GGML_OP_CLAMP:
case GGML_OP_LOG:
#if defined (GGML_SYCL_F16)
return ((op->type == GGML_TYPE_F32 || op->type == GGML_SYCL_F16) && (op->src[0]->type == GGML_TYPE_F32 || op->src[0]->type == GGML_SYCL_F16) && (op->type == op->src[0]->type));
#else
return (op->type == GGML_TYPE_F32 && op->src[0]->type == GGML_TYPE_F32) && (op->type == op->src[0]->type);
#endif
case GGML_OP_NORM:
case GGML_OP_L2_NORM:
case GGML_OP_GROUP_NORM: