disable MTP for parallel slots (#1804)

This commit is contained in:
Samuel Oliveira Alves 2026-05-15 01:11:04 -03:00 committed by GitHub
parent 0fcffdb64d
commit 35fbe08d6e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -286,6 +286,19 @@ bool server_context::load_model(const gpt_params& params_) {
add_bos_token = llama_should_add_bos_token(model);
has_eos_token = llama_add_eos_token(model) != 1;
if (params_base.has_mtp && params_base.n_parallel > 1) {
LOG_WARNING("MTP is not supported with parallel slots yet, disabling MTP to avoid cross-slot corruption.\n", {
{"n_parallel", params_base.n_parallel},
});
params_base.has_mtp = false;
if (params_base.speculative.type == COMMON_SPECULATIVE_TYPE_MTP) {
params_base.speculative.type = COMMON_SPECULATIVE_TYPE_NONE;
}
params_base.speculative.model.clear();
params_base.speculative.params.clear();
params_base.speculative.model_dft = nullptr;
}
bool has_draft_model = !params_base.speculative.model.empty() || !params_base.speculative.params.empty();
std::string& mmproj_path = params_base.mmproj.path;
if (!mmproj_path.empty()) {