mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Disable k-shift for split mode graph
This commit is contained in:
parent
0f10567aac
commit
e55825bdaa
@ -1620,7 +1620,13 @@ bool server_context::launch_slot_with_task(server_slot& slot, server_task& task)
|
||||
if (params_base.ctx_shift) {
|
||||
params_base.ctx_shift = false;
|
||||
LOG_WARNING("%s\n", "ctx_shift is not supported by recurrent model, it will be disabled");
|
||||
}
|
||||
}
|
||||
if (llama_model_is_split_mode_graph(llama_get_model(slot.ctx))) {
|
||||
if (params_base.ctx_shift) {
|
||||
params_base.ctx_shift = false;
|
||||
LOG_WARNING("%s\n", "ctx_shift is not implemented for split mode graph, it will be disabled");
|
||||
}
|
||||
}
|
||||
{
|
||||
const auto& stop = data.find("stop");
|
||||
@ -4423,7 +4429,7 @@ void server_context::update_slots() {
|
||||
// apply context-shift if needed
|
||||
// TODO: simplify and improve
|
||||
context_shift();
|
||||
|
||||
|
||||
// start populating the batch for this iteration
|
||||
common_batch_clear(batch);
|
||||
|
||||
|
||||
@ -685,6 +685,8 @@ extern "C" {
|
||||
|
||||
LLAMA_API bool llama_model_has_recurrent(const struct llama_model * model);
|
||||
|
||||
LLAMA_API bool llama_model_is_split_mode_graph(const struct llama_model * model);
|
||||
|
||||
// Returns 0 on success
|
||||
LLAMA_API uint32_t llama_model_quantize(
|
||||
const char * fname_inp,
|
||||
|
||||
@ -1877,6 +1877,10 @@ bool llama_model_has_recurrent(const llama_model * model) {
|
||||
return llm_arch_is_hybrid(model->arch) || llm_arch_is_recurrent(model->arch);
|
||||
}
|
||||
|
||||
bool llama_model_is_split_mode_graph(const struct llama_model * model) {
|
||||
return model && (model->split_mode == LLAMA_SPLIT_MODE_GRAPH || model->split_mode == LLAMA_SPLIT_MODE_ATTN);
|
||||
}
|
||||
|
||||
llm_tensor llm_tensor_type(llm_arch arch, const std::string & tensor_name, int il) {
|
||||
auto it = LLM_TENSOR_NAMES.find(arch);
|
||||
if (it == LLM_TENSOR_NAMES.end()) {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user