mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
quantize: add exception for Gemma4 (#1897)
This commit is contained in:
parent
6eff055a0c
commit
e75337fec3
@ -1250,8 +1250,12 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
|
||||
// - qs.n_attention_wv == 3 * model.hparams.n_layer for Encoder-Decoder models
|
||||
// - model.arch == LLM_ARCH_DECI for Deci-Nemotron models
|
||||
//
|
||||
GGML_ASSERT((qs.n_attention_wv == 0 || qs.n_attention_wv == (int)model.hparams.n_layer || qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
|
||||
model.arch == LLM_ARCH_DECI || model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
|
||||
GGML_ASSERT((qs.n_attention_wv == 0 ||
|
||||
qs.n_attention_wv == (int)model.hparams.n_layer ||
|
||||
qs.n_attention_wv == 3 * (int)model.hparams.n_layer ||
|
||||
model.arch == LLM_ARCH_DECI ||
|
||||
model.arch == LLM_ARCH_GEMMA4 ||
|
||||
model.arch == LLM_ARCH_UNKNOWN) && "n_attention_wv is unexpected");
|
||||
|
||||
size_t total_size_org = 0;
|
||||
size_t total_size_new = 0;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user