quantize: add exception for Gemma4 (#1897)

2026-06-28 04:30:15 -05:00 · 2026-05-29 10:54:21 +03:00 · 2026-05-29 10:54:21 +03:00 · e75337fec3
commit e75337fec3
parent 6eff055a0c
1 changed files with 6 additions and 2 deletions
--- a/src/llama-quantize.cpp
+++ b/src/llama-quantize.cpp
@ -1250,8 +1250,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
    //  - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
    //  - model.arch == LLM_ARCH_DECI                    for Deci-Nemotron   models
    //
-    GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
-                model.arch == LLM_ARCH_DECI || model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
+    GGML_ASSERT((qs.n_attention_wv == 0 ||
+                 qs.n_attention_wv == (int)model.hparams.n_layer ||
+                 qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
+                 model.arch == LLM_ARCH_DECI ||
+                 model.arch == LLM_ARCH_GEMMA4 ||
+                 model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");

    size_t total_size_org = 0;
    size_t total_size_new = 0;