llama-quantize: enable --extra-output-tensor with COPY (#1871)

This commit is contained in:
Justin Martin 2026-05-23 10:52:34 +00:00 committed by GitHub
parent a6bb509305
commit 40d8cb196a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -1446,6 +1446,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
// do not quantize relative position bias (T5)
quantize &= name.find("attn_rel_b.weight") == std::string::npos;
// quantize the extra output tensor
quantize = tensor == output_tensor || quantize;
enum ggml_type new_type;
void * new_data = nullptr;
size_t new_size = 0;
@ -1516,6 +1519,9 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
if (params->output_tensor_type < GGML_TYPE_COUNT && strcmp(tensor->name, "output.weight") == 0) {
new_type = params->output_tensor_type;
}
else if (params->only_copy && tensor == output_tensor) {
new_type = tensor->type;
}
if (params->ffn_gate_inp_type < GGML_TYPE_COUNT && name.find("ffn_gate_inp.weight") != std::string::npos) {
new_type = params->ffn_gate_inp_type;
}