diff --git a/tools/server/server-context.cpp b/tools/server/server-context.cpp index aebca306a8..ded622cfd6 100644 --- a/tools/server/server-context.cpp +++ b/tools/server/server-context.cpp @@ -1395,11 +1395,23 @@ private: bool update_cache = false; + // if a specific slot is requested, use it (still goes through cache update logic below) + if (task.id_slot != -1) { + ret = get_slot_by_id(task.id_slot); + if (ret) { + SLT_INF(*ret, "selected slot by id (%d)\n", task.id_slot); + } + } + // find the slot that has at least n% prompt similarity - if (ret == nullptr && slot_prompt_similarity != 0.0f) { + if (slot_prompt_similarity != 0.0f) { float sim_best = 0; for (server_slot & slot : slots) { + if (task.id_slot != -1 && slot.id != task.id_slot) { + continue; + } + // skip the slot if it is not available if (slot.is_processing()) { continue; @@ -1426,8 +1438,10 @@ private: if (ret != nullptr) { const float f_keep = (sim_best*task.tokens.size()) / ret->prompt.tokens.size(); - SLT_INF(*ret, "selected slot by LCP similarity, sim_best = %.3f (> %.3f thold), f_keep = %.3f\n", - sim_best, slot_prompt_similarity, f_keep); + if (task.id_slot == -1) { + SLT_INF(*ret, "selected slot by LCP similarity, sim_best = %.3f (> %.3f thold), f_keep = %.3f\n", + sim_best, slot_prompt_similarity, f_keep); + } // if we are about to lose a large portion of the existing context - save it in the prompt cache if (f_keep < 0.5f) { @@ -2180,10 +2194,9 @@ private: } } - const int id_slot = task.id_slot; const int id_task = task.id; - server_slot * slot = id_slot != -1 ? get_slot_by_id(id_slot) : get_available_slot(task); + server_slot * slot = get_available_slot(task); // // slot scheduling logic