From 2c1dc8781b97963d095b9697a08febefafea22d2 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Thu, 18 Jun 2026 13:15:10 +0000 Subject: [PATCH] Fix MTP warmup for GLM models --- common/speculative.cpp | 3 +-- src/graphs/build_gemma4.cpp | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/common/speculative.cpp b/common/speculative.cpp index 9b956c10..d91e0296 100644 --- a/common/speculative.cpp +++ b/common/speculative.cpp @@ -2347,8 +2347,6 @@ void common_speculative_checkpoint_restore( common_speculative_checkpoint_discard(ckpt, ctx); } -static bool mtp_model_uses_recurrent_conditioning(const common_speculative_state_mtp & state); - void common_speculative_commit( common_speculative * spec, llama_context * ctx, @@ -2559,6 +2557,7 @@ static bool mtp_model_uses_recurrent_conditioning(const common_speculative_state if (state.ctx_mtp == nullptr) { return false; } + return true; const llama_model * model = llama_get_model(state.ctx_mtp); if (!llama_model_has_recurrent(model)) { diff --git a/src/graphs/build_gemma4.cpp b/src/graphs/build_gemma4.cpp index a0c555df..7e4c655a 100644 --- a/src/graphs/build_gemma4.cpp +++ b/src/graphs/build_gemma4.cpp @@ -519,8 +519,8 @@ static ggml_cgraph * build_gemma4_graph_parallel(llm_build_context & llm, llama_ } cur = llm_build_context::build_output(lctx, ctx0, cur, model.output, model.output_norm, cb); - cb(cur, "almost_result", -1); if (hparams.f_final_logit_softcapping > 0) { + cb(cur, "almost_result", -1); cur = ggml_softcap(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping, hparams.f_final_logit_softcapping); } cb(cur, "result_output", -1); @@ -666,6 +666,7 @@ ggml_cgraph * llm_build_context::build_gemma4_mtp() { ggml_tensor * mtp_embd = llm_build_lora_mm(lctx, ctx0, model.mtp_post_proj, cur); cb(mtp_embd, "result_mtp_embd", -1); + ggml_set_output(mtp_embd); ggml_build_forward_expand(gf, mtp_embd); ggml_tensor * logits;