diff --git a/src/llama-quantize.cpp b/src/llama-quantize.cpp index 00d2796a..6ccc054d 100644 --- a/src/llama-quantize.cpp +++ b/src/llama-quantize.cpp @@ -1477,6 +1477,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s } else if (auto pos = name.find("ffn_gate_up_exps.weight"); pos != std::string::npos) { auto not_merged_name = name.substr(0, pos) + "ffn_up_exps.weight"; it = imatrix_data->find(not_merged_name); + } else if (auto pos2 = name.find("ffn_gate.weight"); pos2 != std::string::npos) { + auto up_name = name.substr(0, pos2) + "ffn_up.weight"; + it = imatrix_data->find(up_name); } else { // MLA hack: most imatrix files floating around the Internet have been computed with standard attention. // This means that the imatrix file does not contain data for the *.attn_k_b.weight and *.attn_v_b.weight