diff --git a/ggml/src/iqk/fa/iqk_fa_320_256.cpp b/ggml/src/iqk/fa/iqk_fa_320_256.cpp index 95c92ba6..03a9456d 100644 --- a/ggml/src/iqk/fa/iqk_fa_320_256.cpp +++ b/ggml/src/iqk/fa/iqk_fa_320_256.cpp @@ -81,6 +81,24 @@ inline bool iqk_deepseek_helper(ggml_type type_k, iqk_deepseek_helper(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S); return true; } + if (type_k == GGML_TYPE_Q4_0) { + HelperQ40 kh((const char *)k, stride_k); + HelperQ40 vh((const char *)v, stride_v); + iqk_deepseek_helper(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S); + return true; + } + if (type_k == GGML_TYPE_Q4_1) { + HelperQ41 kh((const char *)k, stride_k); + HelperQ41 vh((const char *)v, stride_v); + iqk_deepseek_helper(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S); + return true; + } + if (type_k == GGML_TYPE_IQ4_NL) { + HelperIQ4nl kh((const char *)k, stride_k); + HelperIQ4nl vh((const char *)v, stride_v); + iqk_deepseek_helper(kh, vh, nq1, nk1, stride_q, stride_m, stride_qkv, q, mask, scale, softcap, qkv, sinkf, M, S); + return true; + } #endif if (type_k == GGML_TYPE_F16) { HelperF16 kh((const char *)k, stride_k);