ggml-cuda: tune RDNA3 Q6_K MMVQ nwarps (#23349)

This commit is contained in:
ravel7524 2026-05-20 03:52:21 +02:00 committed by GitHub
parent b28a2f372a
commit b39a7bf1b0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -359,7 +359,9 @@ static constexpr __host__ __device__ int calc_nwarps(ggml_type type, int ncols_d
case GGML_TYPE_Q5_1:
case GGML_TYPE_Q8_0:
case GGML_TYPE_Q4_K:
return 8;
case GGML_TYPE_Q6_K:
return 2;
case GGML_TYPE_IQ4_NL:
return 8;
default: