server: (router) fix stopping_thread potentially hang (#24728)

* server: (router) fix stopping_thread potentially hang

* fix windows build
This commit is contained in:
Xuan-Son Nguyen 2026-06-18 15:41:09 +02:00 committed by GitHub
parent e1efd0991d
commit fe7c8b2414
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -54,7 +54,7 @@ extern char **environ;
struct server_subproc {
std::optional<subprocess_s> sproc; // empty while in DOWNLOADING state
std::atomic<bool> stop_download{false}; // flag to signal download cancellation
std::atomic<bool> stopped{false}; // set to cancel a download or signal child process exit
subprocess_s & get() {
GGML_ASSERT(sproc.has_value() && "subprocess not initialized");
@ -64,6 +64,22 @@ struct server_subproc {
bool is_alive() {
return sproc.has_value() && subprocess_alive(&sproc.value());
}
void terminate() {
if (!sproc.has_value()) {
return;
}
#if defined(_WIN32)
if (sproc->hProcess == NULL) {
return;
}
#else
if (sproc->child <= 0) {
return;
}
#endif
subprocess_terminate(&sproc.value());
}
};
@ -902,50 +918,49 @@ void server_models::load(const std::string & name) {
});
std::thread stopping_thread([&]() {
// thread to monitor stopping signal OR child crash
// thread to monitor explicit stop requests; child crash is signalled via child_proc->stopped
auto is_stopping = [this, &name]() {
return this->stopping_models.find(name) != this->stopping_models.end();
};
auto should_wake = [&]() {
return is_stopping() || !child_proc->is_alive();
};
{
std::unique_lock<std::mutex> lk(this->mutex);
this->cv_stop.wait(lk, should_wake);
this->cv_stop.wait(lk, [&]() {
return is_stopping() || child_proc->stopped.load(std::memory_order_acquire);
});
}
// child may have already exited (e.g. crashed) — skip shutdown sequence
if (!child_proc->is_alive()) {
// child crashed or finished on its own — skip graceful shutdown sequence
if (child_proc->stopped.load(std::memory_order_acquire)) {
return;
}
SRV_INF("stopping model instance name=%s\n", name.c_str());
// send interrupt to child process
fprintf(stdin_file, "%s\n", CMD_ROUTER_TO_CHILD_EXIT);
fflush(stdin_file);
// wait to stop gracefully or timeout
int64_t start_time = ggml_time_ms();
while (true) {
std::unique_lock<std::mutex> lk(this->mutex);
if (!is_stopping()) {
return; // already stopped
if (!is_stopping() || child_proc->stopped.load(std::memory_order_acquire)) {
return;
}
int64_t elapsed = ggml_time_ms() - start_time;
if (elapsed >= stop_timeout * 1000) {
// timeout, force kill
lk.unlock();
SRV_WRN("force-killing model instance name=%s after %d seconds timeout\n", name.c_str(), stop_timeout);
subprocess_terminate(&child_proc->get());
child_proc->terminate();
return;
}
this->cv_stop.wait_for(lk, std::chrono::seconds(1));
this->cv_stop.wait_for(lk, std::chrono::seconds(1), [&]() {
return !is_stopping() || child_proc->stopped.load(std::memory_order_acquire);
});
}
});
// we reach here when the child process exits
// we reach here when the child process exits (stdout EOF)
// note: we cannot join() prior to this point because it will close stdin_file
if (log_thread.joinable()) {
log_thread.join();
}
// stop the timeout monitoring thread
child_proc->stopped.store(true, std::memory_order_release);
{
std::lock_guard<std::mutex> lk(this->mutex);
stopping_models.erase(name);
@ -971,7 +986,7 @@ void server_models::load(const std::string & name) {
// old process should have exited already, but just in case, we clean it up here
if (old_instance.subproc->is_alive()) {
SRV_WRN("old process for model name=%s is still alive, this is unexpected\n", name.c_str());
subprocess_terminate(&old_instance.subproc->get()); // force kill
old_instance.subproc->terminate(); // force kill
}
if (old_instance.th.joinable()) {
old_instance.th.join();
@ -1039,7 +1054,7 @@ void server_models::download(common_params_model && model, common_download_opts
dl->opts = opts; // copy
dl->should_stop = [sp = inst.subproc]() {
return sp->stop_download.load(std::memory_order_relaxed);
return sp->stopped.load(std::memory_order_relaxed);
};
dl->on_progress = [this, name](const common_download_progress & p) {
@ -1069,7 +1084,7 @@ void server_models::unload(const std::string & name) {
if (it != mapping.end()) {
if (it->second.meta.status == SERVER_MODEL_STATUS_DOWNLOADING) {
SRV_INF("cancelling download for model name=%s\n", name.c_str());
it->second.subproc->stop_download.store(true, std::memory_order_relaxed);
it->second.subproc->stopped.store(true, std::memory_order_relaxed);
// for convenience, we wait the status change here
wait(lk, name, [](const server_model_meta & new_meta) {
return new_meta.status != SERVER_MODEL_STATUS_DOWNLOADING;
@ -1080,7 +1095,7 @@ void server_models::unload(const std::string & name) {
if (it->second.meta.status == SERVER_MODEL_STATUS_LOADING) {
// special case: if model is in loading state, unloading means force-killing it
SRV_WRN("model name=%s is still loading, force-killing\n", name.c_str());
subprocess_terminate(&it->second.subproc->get());
it->second.subproc->terminate();
}
cv_stop.notify_all();
// status change will be handled by the managing thread
@ -1097,7 +1112,7 @@ void server_models::unload_all() {
for (auto & [name, inst] : mapping) {
if (inst.meta.status == SERVER_MODEL_STATUS_DOWNLOADING) {
SRV_INF("cancelling download for model name=%s\n", name.c_str());
inst.subproc->stop_download.store(true, std::memory_order_relaxed);
inst.subproc->stopped.store(true, std::memory_order_relaxed);
} else if (inst.meta.is_running()) {
SRV_INF("stopping model instance name=%s\n", name.c_str());
stopping_models.insert(name);