From 6c2cbc4e3337858f0e2f6c26001560aef0e76fef Mon Sep 17 00:00:00 2001 From: Ruben Ortlam Date: Tue, 9 Jun 2026 15:40:07 +0200 Subject: [PATCH] vulkan: disable FA mask_opt on GCN to improve performance --- ggml/src/ggml-vulkan/ggml-vulkan.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-vulkan/ggml-vulkan.cpp b/ggml/src/ggml-vulkan/ggml-vulkan.cpp index 22405f234d..6f942f00cf 100644 --- a/ggml/src/ggml-vulkan/ggml-vulkan.cpp +++ b/ggml/src/ggml-vulkan/ggml-vulkan.cpp @@ -9970,7 +9970,8 @@ static void ggml_vk_flash_attn(ggml_backend_vk_context * ctx, vk_context& subctx } // Only use mask opt when the mask is fairly large. This hasn't been tuned extensively. - bool use_mask_opt = mask && nem1 >= 32 && nem0 * nem1 > 32768 && nem0 >= tuning_params.block_cols * 16; + bool use_mask_opt = mask && nem1 >= 32 && nem0 * nem1 > 32768 && nem0 >= tuning_params.block_cols * 16 + && ctx->device->architecture != vk_device_architecture::AMD_GCN; vk_fa_pipeline_state fa_pipeline_state = get_fa_pipeline_state(ctx->device, tuning_params, HSK, HSV, aligned, f32acc, mask != nullptr, use_mask_opt, logit_softcap != 0, k->type, v->type);