From e55825bdaafa4fb60506db81da1d7658cdb46d67 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Thu, 30 Apr 2026 14:29:50 +0000 Subject: [PATCH] Disable k-shift for split mode graph --- examples/server/server-context.cpp | 8 +++++++- include/llama.h | 2 ++ src/llama-model.cpp | 4 ++++ 3 files changed, 13 insertions(+), 1 deletion(-) diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp index 782d1547..b38d13a0 100644 --- a/examples/server/server-context.cpp +++ b/examples/server/server-context.cpp @@ -1620,7 +1620,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task) if (params_base.ctx_shift) { params_base.ctx_shift = false; LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled"); + } } + if (llama_model_is_split_mode_graph(llama_get_model(slot.ctx))) { + if (params_base.ctx_shift) { + params_base.ctx_shift = false; + LOG_WARNING("%s\n", "ctx_shift is not implemented for split mode graph, it will be disabled"); + } } { const auto& stop = data.find("stop"); @@ -4423,7 +4429,7 @@ void server_context::update_slots() { // apply context-shift if needed // TODO: simplify and improve context_shift(); - + // start populating the batch for this iteration common_batch_clear(batch); diff --git a/include/llama.h b/include/llama.h index 8d6cd295..ac0a275b 100644 --- a/include/llama.h +++ b/include/llama.h @@ -685,6 +685,8 @@ extern "C" { LLAMA_API bool llama_model_has_recurrent(const struct llama_model * model); + LLAMA_API bool llama_model_is_split_mode_graph(const struct llama_model * model); + // Returns 0 on success LLAMA_API uint32_t llama_model_quantize( const char * fname_inp, diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 4840d44c..10d02475 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -1877,6 +1877,10 @@ bool llama_model_has_recurrent(const llama_model * model) { return llm_arch_is_hybrid(model->arch) || llm_arch_is_recurrent(model->arch); } +bool llama_model_is_split_mode_graph(const struct llama_model * model) { + return model && (model->split_mode == LLAMA_SPLIT_MODE_GRAPH || model->split_mode == LLAMA_SPLIT_MODE_ATTN); +} + llm_tensor llm_tensor_type(llm_arch arch, const std::string & tensor_name, int il) { auto it = LLM_TENSOR_NAMES.find(arch); if (it == LLM_TENSOR_NAMES.end()) {