Add all quantization types to Mistral4 MLA on the CPU (#1666)

This commit is contained in:
Kawrakow 2026-04-20 16:11:06 +02:00 committed by GitHub
parent 00ba208a5c
commit d482413158
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -81,6 +81,24 @@ inline bool iqk_deepseek_helper(ggml_type type_k,
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
return true;
}
if (type_k == GGML_TYPE_Q4_0) {
HelperQ40 kh((const char *)k, stride_k);
HelperQ40 vh((const char *)v, stride_v);
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
return true;
}
if (type_k == GGML_TYPE_Q4_1) {
HelperQ41 kh((const char *)k, stride_k);
HelperQ41 vh((const char *)v, stride_v);
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
return true;
}
if (type_k == GGML_TYPE_IQ4_NL) {
HelperIQ4nl kh((const char *)k, stride_k);
HelperIQ4nl vh((const char *)v, stride_v);
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
return true;
}
#endif
if (type_k == GGML_TYPE_F16) {
HelperF16 kh((const char *)k, stride_k);