mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
server: refactor/generalize input file schema (#24299)
* server: refactor/generalize input file schema * wire up input_video, accept raw base64 * nits * nits (2) * fix windows
This commit is contained in:
parent
099b579acb
commit
6ee0f65793
@ -1230,8 +1230,6 @@ print(completion.choices[0].text)
|
|||||||
|
|
||||||
Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only models with a [supported chat template](https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, the ChatML template will be used.
|
Given a ChatML-formatted json description in `messages`, it returns the predicted completion. Both synchronous and streaming mode are supported, so scripted and interactive applications work fine. While no strong claims of compatibility with OpenAI API spec is being made, in our experience it suffices to support many apps. Only models with a [supported chat template](https://github.com/ggml-org/llama.cpp/wiki/Templates-supported-by-llama_chat_apply_template) can be used optimally with this endpoint. By default, the ChatML template will be used.
|
||||||
|
|
||||||
If model supports multimodal, you can input the media file via `image_url` content part. We support both base64 and remote URL as input. See OAI documentation for more.
|
|
||||||
|
|
||||||
*Options:*
|
*Options:*
|
||||||
|
|
||||||
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). llama.cpp `/completion`-specific features such as `mirostat` are also supported.
|
See [OpenAI Chat Completions API documentation](https://platform.openai.com/docs/api-reference/chat). llama.cpp `/completion`-specific features such as `mirostat` are also supported.
|
||||||
@ -1250,9 +1248,18 @@ The `response_format` parameter supports both plain JSON output (e.g. `{"type":
|
|||||||
|
|
||||||
`parallel_tool_calls` : Whether to enable parallel/multiple tool calls (only supported on some models, verification is based on jinja template).
|
`parallel_tool_calls` : Whether to enable parallel/multiple tool calls (only supported on some models, verification is based on jinja template).
|
||||||
|
|
||||||
For multimodal input:
|
For multimodal input (typed content, `messages[i].content[j]`):
|
||||||
- Content type `image_url` and `input_audio` are the same as OAI schema
|
- If `type == "image_url"`:
|
||||||
- Content type `input_video` is an extension from OAI schema. For now, it only accepts base64 input
|
- `image_url.url` can be a remote URL, base64 (raw or URI-encoded via `data:image/...;base64`) or path to local file
|
||||||
|
- Accepts formats supported by `stb_image` (jpeg, png, tga, bmp, gif, ...)
|
||||||
|
- If `type == "input_audio"`:
|
||||||
|
- Either `input_audio.data` or `input_audio.url` can be specified, can be a remote URL, raw base64 or path to local file
|
||||||
|
- Accepts formats supported by `miniaudio` (mp3, wav, flac)
|
||||||
|
- `input_audio.format` will be ignored, the file format will be determined automatically
|
||||||
|
- If `type == "input_video"`:
|
||||||
|
- Either `input_video.data` or `input_video.url` can be specified, can be a remote URL, raw base64 or path to local file
|
||||||
|
- Accepts formats supported by `ffmpeg`
|
||||||
|
- Note: for local file, make sure to set `--media-path`. File path must be prefixed by `file://`
|
||||||
|
|
||||||
*Examples:*
|
*Examples:*
|
||||||
|
|
||||||
|
|||||||
@ -817,12 +817,21 @@ json oaicompat_completion_params_parse(const json & body) {
|
|||||||
return llama_params;
|
return llama_params;
|
||||||
}
|
}
|
||||||
|
|
||||||
// media_path always end with '/', see arg.cpp
|
// url can be
|
||||||
|
// - http(s):// for remote files
|
||||||
|
// - file:// for local files (only allowed if media_path is set)
|
||||||
|
// - data: for base64 encoded data with uri scheme (e.g. data:image/png;base64,...)
|
||||||
|
// - raw base64 encoded data
|
||||||
static void handle_media(
|
static void handle_media(
|
||||||
std::vector<raw_buffer> & out_files,
|
std::vector<raw_buffer> & out_files,
|
||||||
json & media_obj,
|
const std::string & url,
|
||||||
const std::string & media_path) {
|
const std::string & media_path,
|
||||||
std::string url = json_value(media_obj, "url", std::string());
|
bool accept_base64_uri) {
|
||||||
|
if (!media_path.empty()) {
|
||||||
|
// should already be enforced by arg.cpp, but checking just in case
|
||||||
|
GGML_ASSERT(media_path.back() == DIRECTORY_SEPARATOR);
|
||||||
|
}
|
||||||
|
|
||||||
if (string_starts_with(url, "http")) {
|
if (string_starts_with(url, "http")) {
|
||||||
// download remote image
|
// download remote image
|
||||||
// TODO @ngxson : maybe make these params configurable
|
// TODO @ngxson : maybe make these params configurable
|
||||||
@ -858,20 +867,28 @@ static void handle_media(
|
|||||||
data.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
data.assign((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>());
|
||||||
out_files.push_back(data);
|
out_files.push_back(data);
|
||||||
|
|
||||||
} else {
|
} else if (accept_base64_uri && string_starts_with(url, "data:")) {
|
||||||
// try to decode base64 image
|
// try to decode base64 image
|
||||||
std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
|
std::vector<std::string> parts = string_split<std::string>(url, /*separator*/ ',');
|
||||||
if (parts.size() != 2) {
|
if (parts.size() != 2) {
|
||||||
throw std::runtime_error("Invalid url value");
|
throw std::runtime_error("Invalid uri-encoded base64 value");
|
||||||
} else if (!string_starts_with(parts[0], "data:image/")) {
|
} else if (!string_starts_with(parts[0], "data:image/")) {
|
||||||
throw std::runtime_error("Invalid url format: " + parts[0]);
|
throw std::runtime_error("Invalid uri format: " + parts[0]);
|
||||||
} else if (!string_ends_with(parts[0], "base64")) {
|
} else if (!string_ends_with(parts[0], "base64")) {
|
||||||
throw std::runtime_error("url must be base64 encoded");
|
throw std::runtime_error("uri must be base64 encoded");
|
||||||
} else {
|
} else {
|
||||||
auto base64_data = parts[1];
|
auto base64_data = parts[1];
|
||||||
auto decoded_data = base64_decode(base64_data);
|
auto decoded_data = base64_decode(base64_data);
|
||||||
out_files.push_back(decoded_data);
|
out_files.push_back(decoded_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
} else {
|
||||||
|
// try as raw base64 string
|
||||||
|
auto decoded_data = base64_decode(url);
|
||||||
|
if (decoded_data.empty()) {
|
||||||
|
throw std::runtime_error("Invalid base64 value");
|
||||||
|
}
|
||||||
|
out_files.push_back(decoded_data);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -957,14 +974,15 @@ json oaicompat_chat_params_parse(
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (auto & p : content) {
|
for (auto & p : content) {
|
||||||
std::string type = json_value(p, "type", std::string());
|
std::string type = json_value(p, "type", std::string());
|
||||||
if (type == "image_url") {
|
if (type == "image_url") {
|
||||||
if (!opt.allow_image) {
|
if (!opt.allow_image) {
|
||||||
throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
throw std::runtime_error("image input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
||||||
}
|
}
|
||||||
|
|
||||||
json image_url = json_value(p, "image_url", json::object());
|
json image_url = json_value(p, "image_url", json::object());
|
||||||
handle_media(out_files, image_url, opt.media_path);
|
std::string url = json_value(image_url, "url", std::string());
|
||||||
|
handle_media(out_files, url, opt.media_path, true);
|
||||||
|
|
||||||
p["type"] = "media_marker";
|
p["type"] = "media_marker";
|
||||||
p["text"] = get_media_marker();
|
p["text"] = get_media_marker();
|
||||||
@ -975,17 +993,11 @@ json oaicompat_chat_params_parse(
|
|||||||
throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
throw std::runtime_error("audio input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
||||||
}
|
}
|
||||||
|
|
||||||
json input_audio = json_value(p, "input_audio", json::object());
|
// note: don't need to validate "format", it's redundant
|
||||||
std::string data = json_value(input_audio, "data", std::string());
|
json input_audio = json_value(p, "input_audio", json::object());
|
||||||
std::string format = json_value(input_audio, "format", std::string());
|
std::string url = json_value(input_audio, "data",
|
||||||
// while we also support flac, we don't allow it here so we matches the OAI spec
|
json_value(input_audio, "url", std::string()));
|
||||||
if (format != "wav" && format != "mp3") {
|
handle_media(out_files, url, opt.media_path, false);
|
||||||
throw std::invalid_argument("input_audio.format must be either 'wav' or 'mp3'");
|
|
||||||
}
|
|
||||||
auto decoded_data = base64_decode(data); // expected to be base64 encoded
|
|
||||||
out_files.push_back(decoded_data);
|
|
||||||
|
|
||||||
// TODO: add audio_url support by reusing handle_media()
|
|
||||||
|
|
||||||
p["type"] = "media_marker";
|
p["type"] = "media_marker";
|
||||||
p["text"] = get_media_marker();
|
p["text"] = get_media_marker();
|
||||||
@ -996,10 +1008,10 @@ json oaicompat_chat_params_parse(
|
|||||||
throw std::runtime_error("video input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
throw std::runtime_error("video input is not supported - hint: if this is unexpected, you may need to provide the mmproj");
|
||||||
}
|
}
|
||||||
|
|
||||||
json input_video = json_value(p, "input_video", json::object());
|
json input_video = json_value(p, "input_video", json::object());
|
||||||
std::string data = json_value(input_video, "data", std::string());
|
std::string url = json_value(input_video, "data",
|
||||||
auto decoded_data = base64_decode(data); // expected to be base64 encoded
|
json_value(input_video, "url", std::string()));
|
||||||
out_files.push_back(decoded_data);
|
handle_media(out_files, url, opt.media_path, false);
|
||||||
|
|
||||||
p["type"] = "media_marker";
|
p["type"] = "media_marker";
|
||||||
p["text"] = get_media_marker();
|
p["text"] = get_media_marker();
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user