quantize: add exception for Gemma4 (#1897)

This commit is contained in:
Kawrakow 2026-05-29 10:54:21 +03:00 committed by GitHub
parent 6eff055a0c
commit e75337fec3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1250,8 +1250,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
//
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
model.arch == LLM_ARCH_DECI || model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
GGML_ASSERT((qs.n_attention_wv == 0 ||
qs.n_attention_wv == (int)model.hparams.n_layer ||
qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
model.arch == LLM_ARCH_DECI ||
model.arch == LLM_ARCH_GEMMA4 ||
model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
size_t total_size_org = 0;
size_t total_size_new = 0;