mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
common/chat, server: refactor, move all conversion functions to common, add tests (#20690) jinja : remove unused header (#22310) common : fix jinja warnings with clang 21 (#22313) Signed-off-by: Adrien Gallouët <angt@huggingface.co> chat: fix handling of space in reasoning markers (#22353) * chat: fix handling of space in reasoning markers common : re-arm reasoning budget after DONE on new <think> (#22323) common : determine generation prompt using longest common prefix (#22657) common/autoparser: fixes for newline handling / forced tool calls (#22654) * chat/autoparser: the fixes * Move optspace() to chat-peg-parser, comment out server tests invalidated due to content now allowed with forced tool calls. * Trim whitespace on apply instead common/chat : preserve media markers for typed-content templates (#22634) common : revert reasoning budget +inf logit bias (#22740) common : do not wrap raw strings in schema parser for tagged parsers (#22827) common : enable streaming JSON argument values (#23173) * common : remove atomic from json arguments * common : remove parsing logic on JSON arguments common : do not pass prompt tokens to reasoning budget sampler (#22488) reasoning-budget: clone should do a deep-copy (#23095) Co-authored-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com>
559 lines
22 KiB
C++
559 lines
22 KiB
C++
#include "server-chat.h"
|
|
#include "server-common.h"
|
|
|
|
#include <sstream>
|
|
|
|
json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
|
|
if (!response_body.contains("input")) {
|
|
throw std::runtime_error("'input' is required");
|
|
}
|
|
if (!json_value(response_body, "previous_response_id", std::string{}).empty()) {
|
|
throw std::runtime_error("ik_llama.cpp does not support 'previous_response_id'.");
|
|
}
|
|
|
|
const json input_value = response_body.at("input");
|
|
json chatcmpl_body = response_body;
|
|
chatcmpl_body.erase("input");
|
|
std::vector<json> chatcmpl_messages;
|
|
|
|
if (response_body.contains("instructions")) {
|
|
chatcmpl_messages.push_back({
|
|
{"role", "system"},
|
|
{"content", json_value(response_body, "instructions", std::string())},
|
|
});
|
|
chatcmpl_body.erase("instructions");
|
|
}
|
|
|
|
if (input_value.is_string()) {
|
|
chatcmpl_messages.push_back({
|
|
{"role", "user"},
|
|
{"content", input_value},
|
|
});
|
|
}
|
|
else if (input_value.is_array()) {
|
|
static auto exists_and_is_array = [](const json& j, const char* key) -> bool {
|
|
return j.contains(key) && j.at(key).is_array();
|
|
};
|
|
static auto exists_and_is_string = [](const json& j, const char* key) -> bool {
|
|
return j.contains(key) && j.at(key).is_string();
|
|
};
|
|
|
|
for (json item : input_value) {
|
|
if (exists_and_is_string(item, "content")) {
|
|
item["content"] = json::array({
|
|
json{
|
|
{"text", item.at("content")},
|
|
{"type", "input_text"},
|
|
}
|
|
});
|
|
}
|
|
|
|
if (exists_and_is_array(item, "content") &&
|
|
exists_and_is_string(item, "role") &&
|
|
(item.at("role") == "user" || item.at("role") == "system" || item.at("role") == "developer")
|
|
) {
|
|
std::vector<json> chatcmpl_content;
|
|
|
|
for (const json& input_item : item.at("content")) {
|
|
const std::string type = json_value(input_item, "type", std::string());
|
|
|
|
if (type == "input_text") {
|
|
if (!input_item.contains("text")) {
|
|
throw std::runtime_error("'Input text' requires 'text'");
|
|
}
|
|
chatcmpl_content.push_back({
|
|
{"text", input_item.at("text")},
|
|
{"type", "text"},
|
|
});
|
|
}
|
|
else if (type == "input_image") {
|
|
if (!input_item.contains("image_url")) {
|
|
throw std::runtime_error("'image_url' is required");
|
|
}
|
|
chatcmpl_content.push_back({
|
|
{"image_url", json{
|
|
{"url", input_item.at("image_url")},
|
|
}},
|
|
{"type", "image_url"},
|
|
});
|
|
}
|
|
else if (type == "input_file") {
|
|
throw std::runtime_error("'input_file' is not supported by ik_llama.cpp at this moment");
|
|
}
|
|
else {
|
|
throw std::runtime_error("'type' must be one of 'input_text', 'input_image', or 'input_file'");
|
|
}
|
|
}
|
|
|
|
if (item.contains("type")) {
|
|
item.erase("type");
|
|
}
|
|
if (item.contains("status")) {
|
|
item.erase("status");
|
|
}
|
|
item["content"] = chatcmpl_content;
|
|
|
|
chatcmpl_messages.push_back(item);
|
|
}
|
|
else if (exists_and_is_array(item, "content") &&
|
|
exists_and_is_string(item, "role") &&
|
|
item.at("role") == "assistant" &&
|
|
exists_and_is_string(item, "type") &&
|
|
item.at("type") == "message"
|
|
) {
|
|
std::vector<json> chatcmpl_content;
|
|
|
|
for (const auto& output_text : item.at("content")) {
|
|
const std::string type = json_value(output_text, "type", std::string());
|
|
if (type != "output_text") {
|
|
throw std::runtime_error("'type' must be 'output_text'");
|
|
}
|
|
if (!exists_and_is_string(output_text, "text")) {
|
|
throw std::runtime_error("'Output text' requires 'text'");
|
|
}
|
|
chatcmpl_content.push_back({
|
|
{"text", output_text.at("text")},
|
|
{"type", "text"},
|
|
});
|
|
}
|
|
|
|
item.erase("status");
|
|
item.erase("type");
|
|
item["content"] = chatcmpl_content;
|
|
chatcmpl_messages.push_back(item);
|
|
}
|
|
else if (exists_and_is_string(item, "arguments") &&
|
|
exists_and_is_string(item, "call_id") &&
|
|
exists_and_is_string(item, "name") &&
|
|
exists_and_is_string(item, "type") &&
|
|
item.at("type") == "function_call"
|
|
) {
|
|
json msg = json{
|
|
{"role", "assistant"},
|
|
{"tool_calls", json::array({json{
|
|
{"function", json{
|
|
{"arguments", item.at("arguments")},
|
|
{"name", item.at("name")},
|
|
}},
|
|
{"id", item.at("call_id")},
|
|
{"type", "function"},
|
|
}})},
|
|
};
|
|
|
|
if (!chatcmpl_messages.empty() && chatcmpl_messages.back().contains("reasoning_content")) {
|
|
msg["reasoning_content"] = chatcmpl_messages.back().at("reasoning_content");
|
|
chatcmpl_messages.pop_back();
|
|
}
|
|
chatcmpl_messages.push_back(msg);
|
|
}
|
|
else if (exists_and_is_string(item, "call_id") &&
|
|
(exists_and_is_string(item, "output") || exists_and_is_array(item, "output")) &&
|
|
exists_and_is_string(item, "type") &&
|
|
item.at("type") == "function_call_output"
|
|
) {
|
|
if (item.at("output").is_string()) {
|
|
chatcmpl_messages.push_back(json{
|
|
{"content", item.at("output")},
|
|
{"role", "tool"},
|
|
{"tool_call_id", item.at("call_id")},
|
|
});
|
|
}
|
|
else {
|
|
json chatcmpl_outputs = item.at("output");
|
|
for (json& chatcmpl_output : chatcmpl_outputs) {
|
|
if (!chatcmpl_output.contains("type") || chatcmpl_output.at("type") != "input_text") {
|
|
throw std::runtime_error("Output of tool call should be 'Input text'");
|
|
}
|
|
chatcmpl_output["type"] = "text";
|
|
}
|
|
chatcmpl_messages.push_back(json{
|
|
{"content", chatcmpl_outputs},
|
|
{"role", "tool"},
|
|
{"tool_call_id", item.at("call_id")},
|
|
});
|
|
}
|
|
}
|
|
else if (exists_and_is_array(item, "summary") &&
|
|
exists_and_is_string(item, "type") &&
|
|
item.at("type") == "reasoning") {
|
|
if (!exists_and_is_array(item, "content")) {
|
|
throw std::runtime_error("item['content'] is not an array");
|
|
}
|
|
if (item.at("content").empty()) {
|
|
throw std::runtime_error("item['content'] is empty");
|
|
}
|
|
if (!exists_and_is_string(item.at("content")[0], "text")) {
|
|
throw std::runtime_error("item['content']['text'] is not a string");
|
|
}
|
|
|
|
chatcmpl_messages.push_back(json{
|
|
{"role", "assistant"},
|
|
{"content", json::array()},
|
|
{"reasoning_content", item.at("content")[0].at("text")},
|
|
});
|
|
}
|
|
else {
|
|
throw std::runtime_error("Cannot determine type of 'item'");
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
throw std::runtime_error("'input' must be a string or array of objects");
|
|
}
|
|
|
|
chatcmpl_messages.erase(std::remove_if(
|
|
chatcmpl_messages.begin(),
|
|
chatcmpl_messages.end(),
|
|
[](const json& x) {
|
|
return x.contains("role") &&
|
|
x.at("role") == "assistant" &&
|
|
x.contains("content") &&
|
|
x.at("content") == json::array() &&
|
|
x.contains("reasoning_content");
|
|
}),
|
|
chatcmpl_messages.end());
|
|
|
|
chatcmpl_body["messages"] = chatcmpl_messages;
|
|
|
|
if (response_body.contains("tools")) {
|
|
if (!response_body.at("tools").is_array()) {
|
|
throw std::runtime_error("'tools' must be an array of objects");
|
|
}
|
|
std::vector<json> chatcmpl_tools;
|
|
for (json resp_tool : response_body.at("tools")) {
|
|
json chatcmpl_tool;
|
|
|
|
if (json_value(resp_tool, "type", std::string()) != "function") {
|
|
throw std::runtime_error("'type' of tool must be 'function'");
|
|
}
|
|
resp_tool.erase("type");
|
|
chatcmpl_tool["type"] = "function";
|
|
|
|
if (!resp_tool.contains("strict")) {
|
|
resp_tool["strict"] = true;
|
|
}
|
|
chatcmpl_tool["function"] = resp_tool;
|
|
chatcmpl_tools.push_back(chatcmpl_tool);
|
|
}
|
|
chatcmpl_body.erase("tools");
|
|
chatcmpl_body["tools"] = chatcmpl_tools;
|
|
}
|
|
|
|
if (response_body.contains("max_output_tokens")) {
|
|
chatcmpl_body.erase("max_output_tokens");
|
|
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
|
}
|
|
|
|
return chatcmpl_body;
|
|
}
|
|
|
|
json server_chat_convert_anthropic_to_oai(const json & body) {
|
|
json oai_body;
|
|
|
|
// Convert system prompt
|
|
json oai_messages = json::array();
|
|
auto system_param = json_value(body, "system", json());
|
|
if (!system_param.is_null()) {
|
|
std::string system_content;
|
|
|
|
if (system_param.is_string()) {
|
|
system_content = system_param.get<std::string>();
|
|
} else if (system_param.is_array()) {
|
|
for (const auto & block : system_param) {
|
|
if (json_value(block, "type", std::string()) == "text") {
|
|
std::string content_block = json_value(block, "text", std::string());
|
|
if (!string_starts_with(content_block, "x-anthropic-")) {
|
|
system_content += content_block;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
oai_messages.push_back({
|
|
{"role", "system"},
|
|
{"content", system_content}
|
|
});
|
|
}
|
|
|
|
// Convert messages
|
|
if (!body.contains("messages")) {
|
|
throw std::runtime_error("'messages' is required");
|
|
}
|
|
const json & messages = body.at("messages");
|
|
if (messages.is_array()) {
|
|
for (const auto & msg : messages) {
|
|
std::string role = json_value(msg, "role", std::string());
|
|
|
|
if (!msg.contains("content")) {
|
|
if (role == "assistant") {
|
|
continue;
|
|
}
|
|
oai_messages.push_back(msg);
|
|
continue;
|
|
}
|
|
|
|
const json & content = msg.at("content");
|
|
|
|
if (content.is_string()) {
|
|
oai_messages.push_back(msg);
|
|
continue;
|
|
}
|
|
|
|
if (!content.is_array()) {
|
|
oai_messages.push_back(msg);
|
|
continue;
|
|
}
|
|
|
|
json tool_calls = json::array();
|
|
json converted_content = json::array();
|
|
json tool_results = json::array();
|
|
std::string reasoning_content;
|
|
bool has_tool_calls = false;
|
|
|
|
for (const auto & block : content) {
|
|
std::string type = json_value(block, "type", std::string());
|
|
|
|
if (type == "text") {
|
|
converted_content.push_back(block);
|
|
} else if (type == "thinking") {
|
|
reasoning_content += json_value(block, "thinking", std::string());
|
|
} else if (type == "image") {
|
|
json source = json_value(block, "source", json::object());
|
|
std::string source_type = json_value(source, "type", std::string());
|
|
|
|
if (source_type == "base64") {
|
|
std::string media_type = json_value(source, "media_type", std::string("image/jpeg"));
|
|
std::string data = json_value(source, "data", std::string());
|
|
std::ostringstream ss;
|
|
ss << "data:" << media_type << ";base64," << data;
|
|
|
|
converted_content.push_back({
|
|
{"type", "image_url"},
|
|
{"image_url", {
|
|
{"url", ss.str()}
|
|
}}
|
|
});
|
|
} else if (source_type == "url") {
|
|
std::string url = json_value(source, "url", std::string());
|
|
converted_content.push_back({
|
|
{"type", "image_url"},
|
|
{"image_url", {
|
|
{"url", url}
|
|
}}
|
|
});
|
|
}
|
|
} else if (type == "tool_use") {
|
|
tool_calls.push_back({
|
|
{"id", json_value(block, "id", std::string())},
|
|
{"type", "function"},
|
|
{"function", {
|
|
{"name", json_value(block, "name", std::string())},
|
|
{"arguments", json_value(block, "input", json::object()).dump()}
|
|
}}
|
|
});
|
|
has_tool_calls = true;
|
|
} else if (type == "tool_result") {
|
|
std::string tool_use_id = json_value(block, "tool_use_id", std::string());
|
|
|
|
auto result_content = json_value(block, "content", json());
|
|
std::string result_text;
|
|
if (result_content.is_string()) {
|
|
result_text = result_content.get<std::string>();
|
|
} else if (result_content.is_array()) {
|
|
for (const auto & c : result_content) {
|
|
if (json_value(c, "type", std::string()) == "text") {
|
|
result_text += json_value(c, "text", std::string());
|
|
}
|
|
}
|
|
}
|
|
|
|
tool_results.push_back({
|
|
{"role", "tool"},
|
|
{"tool_call_id", tool_use_id},
|
|
{"content", result_text}
|
|
});
|
|
}
|
|
}
|
|
|
|
if (!converted_content.empty() || has_tool_calls || !reasoning_content.empty()) {
|
|
json new_msg = { {"role", role} };
|
|
if (!converted_content.empty()) {
|
|
new_msg["content"] = converted_content;
|
|
} else if (has_tool_calls || !reasoning_content.empty()) {
|
|
new_msg["content"] = "";
|
|
}
|
|
if (!tool_calls.empty()) {
|
|
new_msg["tool_calls"] = tool_calls;
|
|
}
|
|
if (!reasoning_content.empty()) {
|
|
new_msg["reasoning_content"] = reasoning_content;
|
|
}
|
|
oai_messages.push_back(new_msg);
|
|
}
|
|
|
|
for (const auto & tool_msg : tool_results) {
|
|
oai_messages.push_back(tool_msg);
|
|
}
|
|
}
|
|
}
|
|
|
|
oai_body["messages"] = oai_messages;
|
|
|
|
// Convert tools
|
|
if (body.contains("tools")) {
|
|
const json & tools = body.at("tools");
|
|
if (tools.is_array()) {
|
|
json oai_tools = json::array();
|
|
for (const auto & tool : tools) {
|
|
oai_tools.push_back({
|
|
{"type", "function"},
|
|
{"function", {
|
|
{"name", json_value(tool, "name", std::string())},
|
|
{"description", json_value(tool, "description", std::string())},
|
|
{"parameters", tool.contains("input_schema") ? tool.at("input_schema") : json::object()}
|
|
}}
|
|
});
|
|
}
|
|
oai_body["tools"] = oai_tools;
|
|
}
|
|
}
|
|
|
|
// Convert tool_choice
|
|
if (body.contains("tool_choice")) {
|
|
const json & tc = body.at("tool_choice");
|
|
if (tc.is_object()) {
|
|
std::string type = json_value(tc, "type", std::string());
|
|
if (type == "auto") {
|
|
oai_body["tool_choice"] = "auto";
|
|
} else if (type == "any" || type == "tool") {
|
|
oai_body["tool_choice"] = "required";
|
|
}
|
|
}
|
|
}
|
|
|
|
// Convert stop_sequences to stop
|
|
if (body.contains("stop_sequences")) {
|
|
oai_body["stop"] = body.at("stop_sequences");
|
|
}
|
|
|
|
// Handle max_tokens (required in Anthropic, but we're permissive)
|
|
if (body.contains("max_tokens")) {
|
|
oai_body["max_tokens"] = body.at("max_tokens");
|
|
} else {
|
|
oai_body["max_tokens"] = 4096;
|
|
}
|
|
|
|
// Pass through common params
|
|
for (const auto & key : { "temperature", "top_p", "top_k", "stream" }) {
|
|
if (body.contains(key)) {
|
|
oai_body[key] = body.at(key);
|
|
}
|
|
}
|
|
|
|
// Handle Anthropic-specific thinking param
|
|
if (body.contains("thinking")) {
|
|
json thinking = json_value(body, "thinking", json::object());
|
|
std::string thinking_type = json_value(thinking, "type", std::string());
|
|
if (thinking_type == "enabled") {
|
|
int budget_tokens = json_value(thinking, "budget_tokens", 10000);
|
|
oai_body["thinking_budget_tokens"] = budget_tokens;
|
|
}
|
|
}
|
|
|
|
// Handle Anthropic-specific metadata param
|
|
if (body.contains("metadata")) {
|
|
json metadata = json_value(body, "metadata", json::object());
|
|
std::string user_id = json_value(metadata, "user_id", std::string());
|
|
if (!user_id.empty()) {
|
|
oai_body["__metadata_user_id"] = user_id;
|
|
}
|
|
}
|
|
|
|
return oai_body;
|
|
}
|
|
|
|
|
|
json server_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff) {
|
|
json delta = json::object();
|
|
if (!diff.reasoning_content_delta.empty()) {
|
|
delta["reasoning_content"] = diff.reasoning_content_delta;
|
|
}
|
|
if (!diff.content_delta.empty()) {
|
|
delta["content"] = diff.content_delta;
|
|
}
|
|
if (diff.tool_call_index != std::string::npos) {
|
|
json tool_call;
|
|
tool_call["index"] = diff.tool_call_index;
|
|
if (!diff.tool_call_delta.id.empty()) {
|
|
tool_call["id"] = diff.tool_call_delta.id;
|
|
tool_call["type"] = "function";
|
|
}
|
|
if (!diff.tool_call_delta.name.empty() || !diff.tool_call_delta.arguments.empty()) {
|
|
json function = json::object();
|
|
if (!diff.tool_call_delta.name.empty()) {
|
|
function["name"] = diff.tool_call_delta.name;
|
|
}
|
|
if (!diff.tool_call_delta.arguments.empty()) {
|
|
function["arguments"] = diff.tool_call_delta.arguments;
|
|
}
|
|
tool_call["function"] = function;
|
|
}
|
|
delta["tool_calls"] = json::array({ tool_call });
|
|
}
|
|
return delta;
|
|
}
|
|
|
|
json convert_transcriptions_to_chatcmpl(
|
|
const json & inp_body,
|
|
const std::map<std::string, raw_buffer> & in_files,
|
|
std::vector<raw_buffer> & out_files) {
|
|
// TODO @ngxson : this function may need to be improved in the future
|
|
// handle input files
|
|
out_files.clear();
|
|
auto it = in_files.find("file");
|
|
if (it != in_files.end()) {
|
|
out_files.push_back(it->second);
|
|
} else {
|
|
throw std::invalid_argument("No input file found for transcription");
|
|
}
|
|
|
|
// handle input data
|
|
std::string prompt = json_value(inp_body, "prompt", std::string());
|
|
std::string language = json_value(inp_body, "language", std::string());
|
|
std::string response_format = json_value(inp_body, "response_format", std::string("json"));
|
|
if (response_format != "json") {
|
|
throw std::invalid_argument("Only 'json' response_format is supported for transcription");
|
|
}
|
|
if (prompt.empty()) {
|
|
prompt = "Transcribe audio to text";
|
|
}
|
|
if (!language.empty()) {
|
|
prompt += string_format(" (language: %s)", language.c_str());
|
|
}
|
|
prompt += get_media_marker();
|
|
|
|
json chatcmpl_body = inp_body; // copy all fields
|
|
chatcmpl_body["messages"] = json::array({
|
|
{
|
|
{"role", "user"},
|
|
{"content", prompt},
|
|
},
|
|
});
|
|
|
|
// because input from form-data, everything is string, we need to correct the types here
|
|
std::string stream = json_value(inp_body, "stream", std::string("false"));
|
|
chatcmpl_body["stream"] = stream == "true";
|
|
|
|
if (inp_body.contains("max_tokens")) {
|
|
std::string inp = inp_body["max_tokens"].get<std::string>();
|
|
chatcmpl_body["max_tokens"] = std::stoul(inp);
|
|
}
|
|
|
|
if (inp_body.contains("temperature")) {
|
|
std::string inp = inp_body["temperature"].get<std::string>();
|
|
chatcmpl_body["temperature"] = std::stof(inp);
|
|
}
|
|
|
|
return chatcmpl_body;
|
|
}
|