diff --git a/src/llama-quantize.cpp b/src/llama-quantize.cpp
index 1f538882..1e292e48 100644
--- a/src/llama-quantize.cpp
+++ b/src/llama-quantize.cpp
@@ -1250,8 +1250,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
     //  - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
     //  - model.arch == LLM_ARCH_DECI                    for Deci-Nemotron   models
     //
-    GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
-                model.arch == LLM_ARCH_DECI || model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
+    GGML_ASSERT((qs.n_attention_wv == 0 ||
+                 qs.n_attention_wv == (int)model.hparams.n_layer ||
+                 qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
+                 model.arch == LLM_ARCH_DECI ||
+                 model.arch == LLM_ARCH_GEMMA4 ||
+                 model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
 
     size_t total_size_org = 0;
     size_t total_size_new = 0;