diff --git a/src/llama-quant.cpp b/src/llama-quant.cpp index cf92ce4bb8..89b7fe8d43 100644 --- a/src/llama-quant.cpp +++ b/src/llama-quant.cpp @@ -932,8 +932,8 @@ static void llama_model_quantize_impl(const std::string & fname_inp, const std:: // copy the KV pairs from the input file gguf_set_kv (ctx_out.get(), ml.metadata); - gguf_set_val_u32(ctx_out.get(), "general.quantization_version", GGML_QNT_VERSION); // TODO: use LLM_KV - gguf_set_val_u32(ctx_out.get(), "general.file_type", ftype); // TODO: use LLM_KV + gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_QUANTIZATION_VERSION).c_str(), GGML_QNT_VERSION); + gguf_set_val_u32(ctx_out.get(), ml.llm_kv(LLM_KV_GENERAL_FILE_TYPE).c_str(), ftype); // Remove split metadata gguf_remove_key(ctx_out.get(), ml.llm_kv(LLM_KV_SPLIT_NO).c_str());