llama : use LLM_KV for quantization_version & file_type (#24802)

Signed-off-by: Adrien Gallouët <angt@huggingface.co>
This commit is contained in:
Adrien Gallouët 2026-06-20 20:07:01 +02:00 committed by GitHub
parent 75f460ac28
commit 84de01a1f1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -932,8 +932,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std::
// copy the KV pairs from the input file
gguf_set_kv (ctx_out.get(), ml.metadata);
gguf_set_val_u32(ctx_out.get(), "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV
gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION).c_str(), GGML_QNT_VERSION);
gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_FILE_TYPE).c_str(), ftype);
// Remove split metadata
gguf_remove_key(ctx_out.get(), ml.llm_kv(LLM_KV_SPLIT_NO).c_str());