From 1401fc3ca7d1e9965d6c8e5ff71b49ed156061b5 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Tue, 23 Jun 2026 16:39:59 +0200 Subject: [PATCH] cli support router mode Co-authored-by: Piotr Wilkin --- tools/cli/cli-client.cpp | 29 ++++++++++++++++++++--- tools/cli/cli-client.h | 4 ++++ tools/cli/cli-context.cpp | 50 ++++++++++++++++++++++++++++++++++++++- tools/cli/cli-context.h | 4 ++++ tools/cli/cli-view.h | 8 +++++-- 5 files changed, 89 insertions(+), 6 deletions(-) diff --git a/tools/cli/cli-client.cpp b/tools/cli/cli-client.cpp index d45affba93..946282185f 100644 --- a/tools/cli/cli-client.cpp +++ b/tools/cli/cli-client.cpp @@ -28,7 +28,8 @@ static std::string join_path(const common_http_url & parts, const std::string & json cli_client::get(const std::string & path) { auto [cli, parts] = common_http_client(server_base); cli.set_read_timeout(CLI_HTTP_READ_TIMEOUT_SEC, 0); - auto res = cli.Get(join_path(parts, path)); + auto path_with_model = path + (model.empty() ? "" : ("?model=" + model)); + auto res = cli.Get(join_path(parts, path_with_model)); if (!res) { throw std::runtime_error("failed to connect to " + server_base + ": " + httplib::to_string(res.error())); } @@ -45,7 +46,11 @@ json cli_client::get(const std::string & path) { json cli_client::post(const std::string & path, const json & body) { auto [cli, parts] = common_http_client(server_base); cli.set_read_timeout(CLI_HTTP_READ_TIMEOUT_SEC, 0); - auto res = cli.Post(join_path(parts, path), body.dump(), "application/json"); + auto body_with_model = body; + if (!model.empty()) { + body_with_model["model"] = model; + } + auto res = cli.Post(join_path(parts, path), body_with_model.dump(), "application/json"); if (!res) { throw std::runtime_error("failed to connect to " + server_base + ": " + httplib::to_string(res.error())); } @@ -100,7 +105,11 @@ json cli_client::post_sse(const std::string & path, }; httplib::Headers headers = {{"Accept", "text/event-stream"}}; - auto res = cli.Post(join_path(parts, path), headers, body.dump(), "application/json", receiver); + auto body_with_model = body; + if (!model.empty()) { + body_with_model["model"] = model; + } + auto res = cli.Post(join_path(parts, path), headers, body_with_model.dump(), "application/json", receiver); if (!res) { if (res.error() == httplib::Error::Canceled && should_stop()) { @@ -139,3 +148,17 @@ bool cli_client::wait_health(const std::function & is_aborted) { last_error = "aborted while waiting for the server to become ready"; return false; } + +std::vector cli_client::list_models() { + json resp = get("/v1/models"); + if (!resp.contains("data") || !resp.at("data").is_array()) { + throw std::runtime_error("invalid response from /v1/models"); + } + std::vector models; + for (const auto & m : resp.at("data")) { + if (m.contains("id") && m.at("id").is_string()) { + models.push_back(m.at("id").get()); + } + } + return models; +} diff --git a/tools/cli/cli-client.h b/tools/cli/cli-client.h index 463deebf08..1bf9adaf54 100644 --- a/tools/cli/cli-client.h +++ b/tools/cli/cli-client.h @@ -15,6 +15,8 @@ struct cli_client { std::string server_base; // base url, for example "http://127.0.0.1:8080" std::string last_error; // set when wait_health() fails + std::string model; // optional, set when the server has multiple models (router mode) + // simple GET request, returns the response json // throws std::runtime_error on transport error or non-2xx status json get(const std::string & path); @@ -49,4 +51,6 @@ struct cli_client { json get_props() { return get("/props"); } + + std::vector list_models(); }; diff --git a/tools/cli/cli-context.cpp b/tools/cli/cli-context.cpp index cbfde0c0a3..9d906ccbab 100644 --- a/tools/cli/cli-context.cpp +++ b/tools/cli/cli-context.cpp @@ -68,7 +68,8 @@ bool cli_context::init() { std::optional spinner; - if (!params.server_base.empty()) { + bool use_external_server = !params.server_base.empty(); + if (use_external_server) { std::string base = params.server_base; while (!base.empty() && base.back() == '/') { base.pop_back(); @@ -121,6 +122,15 @@ bool cli_context::init() { return false; } + if (use_external_server) { + spinner.reset(); + if (!list_and_ask_models()) { + return false; + } + // restore the spinner for the next step + spinner.emplace("Waiting for server..."); + } + fetch_server_props(); return true; @@ -149,6 +159,44 @@ void cli_context::fetch_server_props() { } } +bool cli_context::list_and_ask_models() { + auto models = client.list_models(); + std::string message = "\nAvailable models:"; + if (!models.empty()) { + for (size_t i = 0; i < models.size(); ++i) { + message += "\n " + std::to_string(i + 1) + ". " + models[i]; + } + } + message += "\n"; + view::show_message(message); + std::string selection; + while (selection.empty()) { + if (should_stop()) { + return false; + } + view::user_turn user_turn; + selection = user_turn.read_input(false, "Select model by number: "); + if (selection.empty()) { + continue; + } + try { + size_t idx = std::stoul(selection); + if (idx > 0 && idx <= models.size()) { + model_name = models[idx - 1]; + client.model = model_name; + view::show_message("Selected model: " + model_name); + break; + } + } catch (...) { + // ignore + } + view::show_error("Invalid selection. Please enter a valid number."); + selection.clear(); + continue; + } + return true; +} + void cli_context::add_system_prompt() { if (!params.system_prompt.empty()) { messages.push_back({ diff --git a/tools/cli/cli-context.h b/tools/cli/cli-context.h index 2c67586d63..99c65cdac4 100644 --- a/tools/cli/cli-context.h +++ b/tools/cli/cli-context.h @@ -52,6 +52,10 @@ private: void add_system_prompt(); void push_user_message(const std::string & text); + // check if server have multiple models (router mode) + // if yes, list them then ask; do nothing otherwise + bool list_and_ask_models(); + // read a file and stage it as a multimodal content part; type is one of // "image", "audio", "video"; returns false if the file cannot be read bool stage_media_file(const std::string & fname, const std::string & type); diff --git a/tools/cli/cli-view.h b/tools/cli/cli-view.h index 852ae7e733..4822d27b66 100644 --- a/tools/cli/cli-view.h +++ b/tools/cli/cli-view.h @@ -162,8 +162,12 @@ namespace view { console::log("\n> %s\n", buffer.c_str()); } } - std::string read_input(bool multiline_input) { - console::log("\n> "); + std::string read_input(bool multiline_input, const char * prompt = nullptr) { + if (prompt) { + console::log("%s", prompt); + } else { + console::log("\n> "); + } std::string buffer; std::string line; bool another_line = true;