From 048a490f763708b84f186078559c0929ed3fcd9c Mon Sep 17 00:00:00 2001 From: Julien Denize <40604584+juliendenize@users.noreply.github.com> Date: Sun, 3 May 2026 21:51:21 +0200 Subject: [PATCH] convert : Mistral format yarn apply_scale support (#22612) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [BUGFIX] Mistral format apply_scale support. * Update convert_hf_to_gguf.py Co-authored-by: Sigbjørn Skjæret * fix misunderstood boolean parameters --------- Co-authored-by: Sigbjørn Skjæret --- convert_hf_to_gguf.py | 3 ++- src/llama-model.cpp | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index af3c4bad7e..7f4a4018f4 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -12716,11 +12716,12 @@ class MistralModel(LlamaModel): def set_mistral_config(gguf_writer: gguf.GGUFWriter, hparams: dict): if "yarn" in hparams: yarn_params = hparams["yarn"] + mscale_all_dim = 1.0 if not yarn_params["apply_scale"] else 0.0 gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) gguf_writer.add_rope_scaling_factor(yarn_params["factor"]) gguf_writer.add_rope_scaling_yarn_beta_fast(yarn_params["beta"]) gguf_writer.add_rope_scaling_yarn_beta_slow(yarn_params["alpha"]) - gguf_writer.add_rope_scaling_yarn_log_mul(1.0) # mscale_all_dim + gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim) gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"]) if "llama_4_scaling" in hparams: diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 9e2a13cbd4..54caff987d 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1994,7 +1994,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { } } - if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f)) { + if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false)) { // [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX] // cancel the factor from the convert script hparams.rope_yarn_log_mul /= 0.1f; @@ -2868,7 +2868,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast, false); ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false); - ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f); + ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false); hparams.f_attn_temp_offset = 0.0f;