Disable k-shift for split mode graph (#1714)

2026-06-28 04:30:15 -05:00 · 2026-04-30 18:03:29 +02:00 · 2026-04-30 18:03:29 +02:00 · a8aecbf159
commit a8aecbf159
parent 0f10567aac
3 changed files with 13 additions and 1 deletions
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@ -1620,7 +1620,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
        if (params_base.ctx_shift) {
            params_base.ctx_shift = false;
            LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled");
+        }
    }
+    if (llama_model_is_split_mode_graph(llama_get_model(slot.ctx))) {
+        if (params_base.ctx_shift) {
+            params_base.ctx_shift = false;
+            LOG_WARNING("%s\n", "ctx_shift is not implemented for split mode graph, it will be disabled");
+        }
    }
    {
        const auto& stop = data.find("stop");
@ -4423,7 +4429,7 @@ void server_context::update_slots() {
    // apply context-shift if needed
    // TODO: simplify and improve
    context_shift();
-    
+
    // start populating the batch for this iteration
    common_batch_clear(batch);

--- a/include/llama.h
+++ b/include/llama.h
@ -685,6 +685,8 @@ extern "C" {

    LLAMA_API bool llama_model_has_recurrent(const struct llama_model * model);

+    LLAMA_API bool llama_model_is_split_mode_graph(const struct llama_model * model);
+
    // Returns 0 on success
    LLAMA_API uint32_t llama_model_quantize(
            const char * fname_inp,
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@ -1877,6 +1877,10 @@ bool llama_model_has_recurrent(const llama_model * model) {
    return llm_arch_is_hybrid(model->arch) || llm_arch_is_recurrent(model->arch);
 }

+bool llama_model_is_split_mode_graph(const struct llama_model * model) {
+    return model && (model->split_mode == LLAMA_SPLIT_MODE_GRAPH || model->split_mode == LLAMA_SPLIT_MODE_ATTN);
+}
+
 llm_tensor llm_tensor_type(llm_arch arch, const std::string & tensor_name, int il) {
    auto it = LLM_TENSOR_NAMES.find(arch);
    if (it == LLM_TENSOR_NAMES.end()) {