Disable k-shift for split mode graph (#1714)

This commit is contained in:
Kawrakow 2026-04-30 18:03:29 +02:00 committed by GitHub
parent 0f10567aac
commit a8aecbf159
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 13 additions and 1 deletions

View File

@ -1620,7 +1620,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
if (params_base.ctx_shift) {
params_base.ctx_shift = false;
LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled");
}
}
if (llama_model_is_split_mode_graph(llama_get_model(slot.ctx))) {
if (params_base.ctx_shift) {
params_base.ctx_shift = false;
LOG_WARNING("%s\n", "ctx_shift is not implemented for split mode graph, it will be disabled");
}
}
{
const auto& stop = data.find("stop");
@ -4423,7 +4429,7 @@ void server_context::update_slots() {
// apply context-shift if needed
// TODO: simplify and improve
context_shift();
// start populating the batch for this iteration
common_batch_clear(batch);

View File

@ -685,6 +685,8 @@ extern "C" {
LLAMA_API bool llama_model_has_recurrent(const struct llama_model * model);
LLAMA_API bool llama_model_is_split_mode_graph(const struct llama_model * model);
// Returns 0 on success
LLAMA_API uint32_t llama_model_quantize(
const char * fname_inp,

View File

@ -1877,6 +1877,10 @@ bool llama_model_has_recurrent(const llama_model * model) {
return llm_arch_is_hybrid(model->arch) || llm_arch_is_recurrent(model->arch);
}
bool llama_model_is_split_mode_graph(const struct llama_model * model) {
return model && (model->split_mode == LLAMA_SPLIT_MODE_GRAPH || model->split_mode == LLAMA_SPLIT_MODE_ATTN);
}
llm_tensor llm_tensor_type(llm_arch arch, const std::string & tensor_name, int il) {
auto it = LLM_TENSOR_NAMES.find(arch);
if (it == LLM_TENSOR_NAMES.end()) {