diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index af3c4bad7e..7f4a4018f4 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -12716,11 +12716,12 @@ class MistralModel(LlamaModel): def set_mistral_config(gguf_writer: gguf.GGUFWriter, hparams: dict): if "yarn" in hparams: yarn_params = hparams["yarn"] + mscale_all_dim = 1.0 if not yarn_params["apply_scale"] else 0.0 gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN) gguf_writer.add_rope_scaling_factor(yarn_params["factor"]) gguf_writer.add_rope_scaling_yarn_beta_fast(yarn_params["beta"]) gguf_writer.add_rope_scaling_yarn_beta_slow(yarn_params["alpha"]) - gguf_writer.add_rope_scaling_yarn_log_mul(1.0) # mscale_all_dim + gguf_writer.add_rope_scaling_yarn_log_mul(mscale_all_dim) gguf_writer.add_rope_scaling_orig_ctx_len(yarn_params["original_max_position_embeddings"]) if "llama_4_scaling" in hparams: diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 9e2a13cbd4..54caff987d 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1994,7 +1994,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { } } - if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f)) { + if (ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false)) { // [TAG_DEEPSEEK2_YARN_LOG_MUL_FIX] // cancel the factor from the convert script hparams.rope_yarn_log_mul /= 0.1f; @@ -2868,7 +2868,7 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_FAST, hparams.yarn_beta_fast, false); ml.get_key(LLM_KV_ROPE_SCALING_YARN_BETA_SLOW, hparams.yarn_beta_slow, false); - ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, 0.0f); + ml.get_key(LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul, false); hparams.f_attn_temp_offset = 0.0f;