From 4e1851b01a6851e2c8a6f236f2acd324c6a9aa7d Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Fri, 15 May 2026 07:28:34 +0300 Subject: [PATCH] imatrix: use data for ffn_up when data for ffn_gate is missing (#1805) --- src/llama-quantize.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama-quantize.cpp b/src/llama-quantize.cpp index 00d2796a..6ccc054d 100644 --- a/src/llama-quantize.cpp +++ b/src/llama-quantize.cpp @@ -1477,6 +1477,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } else if (auto pos = name.find("ffn_gate_up_exps.weight"); pos != std::string::npos) { auto not_merged_name = name.substr(0, pos) + "ffn_up_exps.weight"; it = imatrix_data->find(not_merged_name); + } else if (auto pos2 = name.find("ffn_gate.weight"); pos2 != std::string::npos) { + auto up_name = name.substr(0, pos2) + "ffn_up.weight"; + it = imatrix_data->find(up_name); } else { // MLA hack: most imatrix files floating around the Internet have been computed with standard attention. // This means that the imatrix file does not contain data for the *.attn_k_b.weight and *.attn_v_b.weight