ggml-cuda: tune RDNA3 Q6_K MMVQ nwarps (#23349)

2026-06-27 23:50:20 -05:00 · 2026-05-20 03:52:21 +02:00 · 2026-05-20 03:52:21 +02:00 · b39a7bf1b0
commit b39a7bf1b0
parent b28a2f372a
1 changed files with 2 additions and 0 deletions
--- a/ggml/src/ggml-cuda/mmvq.cu
+++ b/ggml/src/ggml-cuda/mmvq.cu
@ -359,7 +359,9 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
                case GGML_TYPE_Q5_1:
                case GGML_TYPE_Q8_0:
                case GGML_TYPE_Q4_K:
+                    return 8;
                case GGML_TYPE_Q6_K:
+                    return 2;
                case GGML_TYPE_IQ4_NL:
                    return 8;
                default: