Fix MTP warmup for GLM models (#1992)

This commit is contained in:
Kawrakow 2026-06-19 08:59:55 +02:00 committed by GitHub
parent b3dfb7858c
commit 0d59973e4a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 3 additions and 3 deletions

View File

@ -2347,8 +2347,6 @@ void common_speculative_checkpoint_restore(
common_speculative_checkpoint_discard(ckpt, ctx);
}
static bool mtp_model_uses_recurrent_conditioning(const common_speculative_state_mtp & state);
void common_speculative_commit(
common_speculative * spec,
llama_context * ctx,
@ -2559,6 +2557,7 @@ static bool mtp_model_uses_recurrent_conditioning(const common_speculative_state
if (state.ctx_mtp == nullptr) {
return false;
}
return true;
const llama_model * model = llama_get_model(state.ctx_mtp);
if (!llama_model_has_recurrent(model)) {

View File

@ -519,8 +519,8 @@ static ggml_cgraph * build_gemma4_graph_parallel(llm_build_context & llm, llama_
}
cur = llm_build_context::build_output(lctx, ctx0, cur, model.output, model.output_norm, cb);
cb(cur, "almost_result", -1);
if (hparams.f_final_logit_softcapping > 0) {
cb(cur, "almost_result", -1);
cur = ggml_softcap(ctx0, cur, 1.0f / hparams.f_final_logit_softcapping, hparams.f_final_logit_softcapping);
}
cb(cur, "result_output", -1);
@ -666,6 +666,7 @@ ggml_cgraph * llm_build_context::build_gemma4_mtp() {
ggml_tensor * mtp_embd = llm_build_lora_mm(lctx, ctx0, model.mtp_post_proj, cur);
cb(mtp_embd, "result_mtp_embd", -1);
ggml_set_output(mtp_embd);
ggml_build_forward_expand(gf, mtp_embd);
ggml_tensor * logits;