From 9df06805eee8d600ccc3cb1b099658c9a91b0bae Mon Sep 17 00:00:00 2001 From: Jeff Bolz Date: Fri, 26 Jun 2026 04:53:32 -0500 Subject: [PATCH] vulkan: Workaround compiler bug in conv2d coopmat2 path (#24924) * vulkan: Workaround compiler bug in conv2d coopmat2 path * apply same workaround to CONV_3D * Apply suggestion from @jeffbolznv --- ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp | 2 +- ggml/src/ggml-vulkan/vulkan-shaders/conv3d_mm.comp | 2 +- tests/test-backend-ops.cpp | 3 +++ 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp index 1428ef68d8..99400098bf 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/conv2d_mm.comp @@ -158,7 +158,7 @@ const uint32_t Csh_stride = BS_NPQ; #ifdef COOPMAT const uint32_t Csh_len = BS_K * Csh_stride; #else -const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 1; +const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 8; // 8 to workaround compiler bug #endif shared SHMEM_TYPE Csh[Csh_len]; // K x NPQ #endif diff --git a/ggml/src/ggml-vulkan/vulkan-shaders/conv3d_mm.comp b/ggml/src/ggml-vulkan/vulkan-shaders/conv3d_mm.comp index a9712eb3ac..f66f299f6d 100644 --- a/ggml/src/ggml-vulkan/vulkan-shaders/conv3d_mm.comp +++ b/ggml/src/ggml-vulkan/vulkan-shaders/conv3d_mm.comp @@ -144,7 +144,7 @@ const uint32_t Csh_stride = BS_NPQ; #ifdef COOPMAT const uint32_t Csh_len = BS_K * Csh_stride; #else -const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 1; +const uint32_t Csh_len = csh_store != 0 ? BS_K * Csh_stride : 8; // 8 to workaround compiler bug #endif shared SHMEM_TYPE Csh[Csh_len]; // K x NPQ #endif diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 0a017d57e7..0830dbf570 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -7973,6 +7973,9 @@ static std::vector> make_test_cases_eval() { } } } + for (auto kernel_type : {GGML_TYPE_F32, GGML_TYPE_F16}) { + test_cases.emplace_back(new test_conv_2d({ 256, 256, 192, 1 }, { 3, 3, 192, 96 }, kernel_type, 1, 1, 1, 1, 1, 1, false)); + } // sycl backend will limit task global_range < MAX_INT // test cases for 2D im2col with large input W and H (occurs in stable-diffusion)