From 35fbe08d6e97444bd8a6a8dbeeecfdc77d7cc099 Mon Sep 17 00:00:00 2001 From: Samuel Oliveira Alves <107287165+SamuelOliveirads@users.noreply.github.com> Date: Fri, 15 May 2026 01:11:04 -0300 Subject: [PATCH] disable MTP for parallel slots (#1804) --- examples/server/server-context.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 24dcf533..cbd76a6d 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -286,6 +286,19 @@ bool server_context::load_model(const gpt_params& params_) { add_bos_token = llama_should_add_bos_token(model); has_eos_token = llama_add_eos_token(model) != 1; + if (params_base.has_mtp && params_base.n_parallel > 1) { + LOG_WARNING("MTP is not supported with parallel slots yet, disabling MTP to avoid cross-slot corruption.\n", { + {"n_parallel", params_base.n_parallel}, + }); + params_base.has_mtp = false; + if (params_base.speculative.type == COMMON_SPECULATIVE_TYPE_MTP) { + params_base.speculative.type = COMMON_SPECULATIVE_TYPE_NONE; + } + params_base.speculative.model.clear(); + params_base.speculative.params.clear(); + params_base.speculative.model_dft = nullptr; + } + bool has_draft_model = !params_base.speculative.model.empty() || !params_base.speculative.params.empty(); std::string& mmproj_path = params_base.mmproj.path; if (!mmproj_path.empty()) {