From e55825bdaafa4fb60506db81da1d7658cdb46d67 Mon Sep 17 00:00:00 2001
From: Kawrakow <iwankawrakow@gmail.com>
Date: Thu, 30 Apr 2026 14:29:50 +0000
Subject: [PATCH] Disable k-shift for split mode graph

---
 examples/server/server-context.cpp | 8 +++++++-
 include/llama.h                    | 2 ++
 src/llama-model.cpp                | 4 ++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/examples/server/server-context.cpp b/examples/server/server-context.cpp
index 782d1547..b38d13a0 100644
--- a/examples/server/server-context.cpp
+++ b/examples/server/server-context.cpp
@@ -1620,7 +1620,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
         if (params_base.ctx_shift) {
             params_base.ctx_shift = false;
             LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled");
+        }
     }
+    if (llama_model_is_split_mode_graph(llama_get_model(slot.ctx))) {
+        if (params_base.ctx_shift) {
+            params_base.ctx_shift = false;
+            LOG_WARNING("%s\n", "ctx_shift is not implemented for split mode graph, it will be disabled");
+        }
     }
     {
         const auto& stop = data.find("stop");
@@ -4423,7 +4429,7 @@ void server_context::update_slots() {
     // apply context-shift if needed
     // TODO: simplify and improve
     context_shift();
-    
+
     // start populating the batch for this iteration
     common_batch_clear(batch);
 
diff --git a/include/llama.h b/include/llama.h
index 8d6cd295..ac0a275b 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -685,6 +685,8 @@ extern "C" {
 
     LLAMA_API bool llama_model_has_recurrent(const struct llama_model * model);
 
+    LLAMA_API bool llama_model_is_split_mode_graph(const struct llama_model * model);
+
     // Returns 0 on success
     LLAMA_API uint32_t llama_model_quantize(
             const char * fname_inp,
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
index 4840d44c..10d02475 100644
--- a/src/llama-model.cpp
+++ b/src/llama-model.cpp
@@ -1877,6 +1877,10 @@ bool llama_model_has_recurrent(const llama_model * model) {
     return llm_arch_is_hybrid(model->arch) || llm_arch_is_recurrent(model->arch);
 }
 
+bool llama_model_is_split_mode_graph(const struct llama_model * model) {
+    return model && (model->split_mode == LLAMA_SPLIT_MODE_GRAPH || model->split_mode == LLAMA_SPLIT_MODE_ATTN);
+}
+
 llm_tensor llm_tensor_type(llm_arch arch, const std::string & tensor_name, int il) {
     auto it = LLM_TENSOR_NAMES.find(arch);
     if (it == LLM_TENSOR_NAMES.end()) {