From 9c10954865a0064fcd6b7f4d19400b36de4a6444 Mon Sep 17 00:00:00 2001 From: Neo Zhang Date: Thu, 25 Jun 2026 13:27:58 +0800 Subject: [PATCH] sycl : fix the failed UT cases of conv_3d (#24900) --- ggml/src/ggml-sycl/conv3d.cpp | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/ggml/src/ggml-sycl/conv3d.cpp b/ggml/src/ggml-sycl/conv3d.cpp index 2fa29f9305..3796562553 100644 --- a/ggml/src/ggml-sycl/conv3d.cpp +++ b/ggml/src/ggml-sycl/conv3d.cpp @@ -103,8 +103,8 @@ void ggml_sycl_op_conv_3d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { // allocate packed arrays: A_packed (k x m), B_packed (k x n) ggml_sycl_pool_alloc A_packed_alloc(ctx.pool()); ggml_sycl_pool_alloc B_packed_alloc(ctx.pool()); - A_packed_alloc.alloc((size_t) knl_n_total * patch_total * sizeof(float)); - B_packed_alloc.alloc((size_t) knl_n_total * oc * sizeof(float)); + A_packed_alloc.alloc((size_t) knl_n_total * patch_total); + B_packed_alloc.alloc((size_t) knl_n_total * oc); float * A_packed = A_packed_alloc.get(); float * B_packed = B_packed_alloc.get(); @@ -115,10 +115,16 @@ void ggml_sycl_op_conv_3d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { // Combined kernel: im2col -> pack A, and pack B simultaneously const char * src1_base = (const char *) src1->data; + const char * src0_base = (const char *) src0->data; const int64_t src1_nb0 = src1->nb[0]; const int64_t src1_nb1 = src1->nb[1]; const int64_t src1_nb2 = src1->nb[2]; const int64_t src1_nb3 = src1->nb[3]; + const int64_t src1_w = src1->ne[0]; + const int64_t src1_h = src1->ne[1]; + const int64_t src1_d = src1->ne[2]; + + const bool src0_is_f32 = (src0->type == GGML_TYPE_F32); // Compute correct strides for src0 as (knl_n_total, oc) matrix const int64_t src0_packed_nb0 = kernel_type_size; @@ -165,7 +171,7 @@ void ggml_sycl_op_conv_3d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { const int64_t sz = dst_z * s2 + kz * d2 - p2; float val = 0.0f; - if (sx >= 0 && sx < src1->ne[0] && sy >= 0 && sy < src1->ne[1] && sz >= 0 && sz < src1->ne[2]) { + if (sx >= 0 && sx < src1_w && sy >= 0 && sy < src1_h && sz >= 0 && sz < src1_d) { const int64_t channel_idx = batch_idx * c + ic; const char * ptr = src1_base + sx * src1_nb0 + sy * src1_nb1 + sz * src1_nb2 + channel_idx * src1_nb3; val = *(const float *) ptr; @@ -184,9 +190,9 @@ void ggml_sycl_op_conv_3d(ggml_backend_sycl_context & ctx, ggml_tensor * dst) { const int64_t row = t % k; const int64_t col = t / k; - const char * src_ptr = (const char *) src0->data + row * src0_packed_nb0 + col * src0_packed_nb1; + const char * src_ptr = src0_base + row * src0_packed_nb0 + col * src0_packed_nb1; float v; - if (src0->type == GGML_TYPE_F32) { + if (src0_is_f32) { v = *(const float *) src_ptr; } else { v = sycl::vec(*(const sycl::half *) src_ptr).convert()[0];