mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Add all quantization types to Mistral4 MLA on the CPU (#1666)
This commit is contained in:
parent
00ba208a5c
commit
d482413158
@ -81,6 +81,24 @@ inline bool iqk_deepseek_helper(ggml_type type_k,
|
||||
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
|
||||
return true;
|
||||
}
|
||||
if (type_k == GGML_TYPE_Q4_0) {
|
||||
HelperQ40 kh((const char *)k, stride_k);
|
||||
HelperQ40 vh((const char *)v, stride_v);
|
||||
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
|
||||
return true;
|
||||
}
|
||||
if (type_k == GGML_TYPE_Q4_1) {
|
||||
HelperQ41 kh((const char *)k, stride_k);
|
||||
HelperQ41 vh((const char *)v, stride_v);
|
||||
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
|
||||
return true;
|
||||
}
|
||||
if (type_k == GGML_TYPE_IQ4_NL) {
|
||||
HelperIQ4nl kh((const char *)k, stride_k);
|
||||
HelperIQ4nl vh((const char *)v, stride_v);
|
||||
iqk_deepseek_helper<step_k>(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S);
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
if (type_k == GGML_TYPE_F16) {
|
||||
HelperF16 kh((const char *)k, stride_k);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user