mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
Codex CLI Responses Compatibility (#1964)
* responses: skip known unsupported Responses tool types from Codex CLI - Skip namespace, web_search, image_generation tools instead of HTTP 500 - Reject unknown non-function tool types with controlled error - Preserve function tool conversion logic unchanged Fixes Codex CLI 0.133.0 compatibility where it sends mixed tool types. * responses: harden codex compatibility coverage * responses: expose Codex model catalog metadata
This commit is contained in:
parent
d37d92b54c
commit
064d23a6f8
@ -223,8 +223,12 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
|
||||
for (json resp_tool : response_body.at("tools")) {
|
||||
json chatcmpl_tool;
|
||||
|
||||
if (json_value(resp_tool, "type", std::string()) != "function") {
|
||||
throw std::runtime_error("'type' of tool must be 'function'");
|
||||
const std::string tool_type = json_value(resp_tool, "type", std::string());
|
||||
|
||||
// Chat Completions only supports function tools. Responses built-ins
|
||||
// such as web_search, image_generation, and namespace are ignored.
|
||||
if (tool_type != "function") {
|
||||
continue;
|
||||
}
|
||||
resp_tool.erase("type");
|
||||
chatcmpl_tool["type"] = "function";
|
||||
@ -236,7 +240,9 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
|
||||
chatcmpl_tools.push_back(chatcmpl_tool);
|
||||
}
|
||||
chatcmpl_body.erase("tools");
|
||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||
if (!chatcmpl_tools.empty()) {
|
||||
chatcmpl_body["tools"] = chatcmpl_tools;
|
||||
}
|
||||
}
|
||||
|
||||
if (response_body.contains("max_output_tokens")) {
|
||||
@ -244,6 +250,15 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
|
||||
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
|
||||
}
|
||||
|
||||
chatcmpl_body.erase("reasoning");
|
||||
chatcmpl_body.erase("store");
|
||||
chatcmpl_body.erase("include");
|
||||
chatcmpl_body.erase("prompt_cache_key");
|
||||
chatcmpl_body.erase("client_metadata");
|
||||
chatcmpl_body.erase("background");
|
||||
chatcmpl_body.erase("max_tool_calls");
|
||||
chatcmpl_body.erase("metadata");
|
||||
|
||||
return chatcmpl_body;
|
||||
}
|
||||
|
||||
|
||||
@ -1275,6 +1275,48 @@ int main(int argc, char ** argv) {
|
||||
};
|
||||
|
||||
const auto handle_models = [¶ms, &model_meta](const httplib::Request & req, httplib::Response & res) {
|
||||
json codex_model = {
|
||||
{"slug", params.model_alias},
|
||||
{"display_name", params.model_alias},
|
||||
{"description", nullptr},
|
||||
{"default_reasoning_level", nullptr},
|
||||
{"supported_reasoning_levels", json::array()},
|
||||
{"shell_type", "default"},
|
||||
{"visibility", "list"},
|
||||
{"supported_in_api", true},
|
||||
{"priority", 0},
|
||||
{"additional_speed_tiers", json::array()},
|
||||
{"service_tiers", json::array()},
|
||||
{"default_service_tier", nullptr},
|
||||
{"availability_nux", nullptr},
|
||||
{"upgrade", nullptr},
|
||||
{"base_instructions", ""},
|
||||
{"model_messages", nullptr},
|
||||
{"supports_reasoning_summaries", false},
|
||||
{"default_reasoning_summary", "auto"},
|
||||
{"support_verbosity", false},
|
||||
{"default_verbosity", nullptr},
|
||||
{"apply_patch_tool_type", nullptr},
|
||||
{"web_search_tool_type", "text"},
|
||||
{"truncation_policy", {
|
||||
{"mode", "tokens"},
|
||||
{"limit", params.n_ctx},
|
||||
}},
|
||||
{"supports_parallel_tool_calls", false},
|
||||
{"supports_image_detail_original", false},
|
||||
{"context_window", params.n_ctx},
|
||||
{"max_context_window", params.n_ctx},
|
||||
{"auto_compact_token_limit", (params.n_ctx * 9) / 10},
|
||||
{"effective_context_window_percent", 95},
|
||||
{"experimental_supported_tools", json::array()},
|
||||
{"input_modalities", json::array({"text"})},
|
||||
{"supports_search_tool", false},
|
||||
{"use_responses_lite", false},
|
||||
{"auto_review_model_override", nullptr},
|
||||
{"tool_mode", nullptr},
|
||||
{"multi_agent_version", nullptr},
|
||||
};
|
||||
|
||||
json models = {
|
||||
{"object", "list"},
|
||||
{"data", {
|
||||
@ -1286,7 +1328,8 @@ int main(int argc, char ** argv) {
|
||||
{"meta", model_meta},
|
||||
{"max_model_len", params.n_ctx}, //vllm specs
|
||||
},
|
||||
}}
|
||||
}},
|
||||
{"models", json::array({codex_model})},
|
||||
};
|
||||
|
||||
res.set_content(models.dump(), "application/json; charset=utf-8");
|
||||
|
||||
37
examples/server/tests/features/codex_responses.feature
Normal file
37
examples/server/tests/features/codex_responses.feature
Normal file
@ -0,0 +1,37 @@
|
||||
@llama.cpp
|
||||
@server
|
||||
@codex
|
||||
Feature: Codex CLI Responses API Compatibility
|
||||
|
||||
Background: Server startup
|
||||
Given a server listening on localhost:8080
|
||||
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
|
||||
And a model file test-model.gguf
|
||||
And a model alias tinyllama-2
|
||||
And 42 as server seed
|
||||
And 256 KV cache size
|
||||
And 32 as batch size
|
||||
And 2 slots
|
||||
And 64 server max tokens to predict
|
||||
And Jinja templating enabled
|
||||
Then the server is starting
|
||||
Then the server is healthy
|
||||
|
||||
Scenario: Responses API accepts mixed tool types from Codex
|
||||
Given a model test
|
||||
And an OAI compatible responses request with mixed Codex tool types
|
||||
Then the mixed Codex tools response succeeds
|
||||
|
||||
Scenario: Models endpoint includes Codex model catalog metadata
|
||||
Given the Codex model catalog is requested
|
||||
Then the Codex model catalog is compatible
|
||||
|
||||
Scenario: Probe request with empty input and max_output_tokens=1 is accepted
|
||||
Given a model test
|
||||
And a probe responses request with empty input and max_output_tokens=1
|
||||
Then the probe response is accepted
|
||||
|
||||
Scenario: previous_response_id returns a controlled error
|
||||
Given a model test
|
||||
And a responses request with invalid previous_response_id
|
||||
Then the previous_response_id request returns an error
|
||||
263
examples/server/tests/features/steps/codex_responses_steps.py
Normal file
263
examples/server/tests/features/steps/codex_responses_steps.py
Normal file
@ -0,0 +1,263 @@
|
||||
"""
|
||||
Behave step definitions for Codex CLI Responses API compatibility tests.
|
||||
Tests that /v1/responses accepts Codex-shaped payloads with mixed tool types.
|
||||
"""
|
||||
|
||||
import json
|
||||
|
||||
from behave import step # pyright: ignore[reportAttributeAccessIssue]
|
||||
from behave.api.async_step import async_run_until_complete
|
||||
|
||||
import aiohttp
|
||||
|
||||
|
||||
# Codex CLI 0.133.0 sends mixed tool types: function + namespace + web_search + image_generation
|
||||
CODEX_MIXED_TOOLS_PAYLOAD = {
|
||||
"model": "test",
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": "Reply exactly local-ok"}]
|
||||
}
|
||||
],
|
||||
"tools": [
|
||||
{
|
||||
"type": "function",
|
||||
"name": "exec_command",
|
||||
"description": "Run a command",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {"cmd": {"type": "string"}},
|
||||
"required": ["cmd"]
|
||||
},
|
||||
"strict": False
|
||||
},
|
||||
{
|
||||
"type": "namespace",
|
||||
"name": "multi_agent_v1",
|
||||
"description": "Sub-agent tools",
|
||||
"tools": []
|
||||
},
|
||||
{
|
||||
"type": "web_search",
|
||||
"external_web_access": True
|
||||
},
|
||||
{
|
||||
"type": "image_generation",
|
||||
"output_format": "png"
|
||||
}
|
||||
],
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": False,
|
||||
"stream": False,
|
||||
"max_output_tokens": 8,
|
||||
}
|
||||
|
||||
PROBE_EMPTY_INPUT_PAYLOAD = {
|
||||
"model": "test",
|
||||
"input": "",
|
||||
"stream": False,
|
||||
"max_output_tokens": 1,
|
||||
}
|
||||
|
||||
INVALID_PREVIOUS_RESPONSE_PAYLOAD = {
|
||||
"model": "test",
|
||||
"input": [
|
||||
{
|
||||
"type": "message",
|
||||
"role": "user",
|
||||
"content": [{"type": "input_text", "text": "test"}]
|
||||
}
|
||||
],
|
||||
"previous_response_id": "invalid-id-12345",
|
||||
"stream": False,
|
||||
}
|
||||
|
||||
|
||||
@step("an OAI compatible responses request with mixed Codex tool types")
|
||||
@async_run_until_complete
|
||||
async def step_oai_responses_mixed_tools(context):
|
||||
"""
|
||||
Send a Responses API request with mixed tool types from Codex CLI:
|
||||
- function (should be converted)
|
||||
- namespace, web_search, image_generation (should be skipped, not rejected)
|
||||
"""
|
||||
if context.debug:
|
||||
print("Submitting Responses API request with mixed Codex tool types...")
|
||||
|
||||
payload = CODEX_MIXED_TOOLS_PAYLOAD.copy()
|
||||
if hasattr(context, "model") and context.model:
|
||||
payload["model"] = context.model
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = f"{context.base_url}/v1/responses"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
|
||||
}
|
||||
|
||||
async with session.post(url, json=payload, headers=headers) as response:
|
||||
context.responses_status = response.status
|
||||
context.responses_text = await response.text()
|
||||
|
||||
|
||||
@step("the Codex model catalog is requested")
|
||||
@async_run_until_complete
|
||||
async def step_codex_model_catalog_requested(context):
|
||||
"""Fetch /v1/models and keep the raw response for Codex catalog assertions."""
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = f"{context.base_url}/v1/models"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
|
||||
}
|
||||
|
||||
async with session.get(url, headers=headers) as response:
|
||||
context.codex_models_status = response.status
|
||||
context.codex_models_text = await response.text()
|
||||
|
||||
|
||||
@step("a probe responses request with empty input and max_output_tokens=1")
|
||||
@async_run_until_complete
|
||||
async def step_probe_empty_input(context):
|
||||
"""Send a probe request with empty input and minimal tokens."""
|
||||
if context.debug:
|
||||
print("Submitting probe Responses API request with empty input...")
|
||||
|
||||
payload = PROBE_EMPTY_INPUT_PAYLOAD.copy()
|
||||
if hasattr(context, "model") and context.model:
|
||||
payload["model"] = context.model
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = f"{context.base_url}/v1/responses"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
|
||||
}
|
||||
|
||||
async with session.post(url, json=payload, headers=headers) as response:
|
||||
context.probe_status = response.status
|
||||
context.probe_text = await response.text()
|
||||
|
||||
|
||||
@step("a responses request with invalid previous_response_id")
|
||||
@async_run_until_complete
|
||||
async def step_invalid_previous_response_id(context):
|
||||
"""Send a request with an invalid previous_response_id to trigger an error."""
|
||||
if context.debug:
|
||||
print("Submitting Responses API request with invalid previous_response_id...")
|
||||
|
||||
payload = INVALID_PREVIOUS_RESPONSE_PAYLOAD.copy()
|
||||
if hasattr(context, "model") and context.model:
|
||||
payload["model"] = context.model
|
||||
|
||||
async with aiohttp.ClientSession() as session:
|
||||
url = f"{context.base_url}/v1/responses"
|
||||
headers = {
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
|
||||
}
|
||||
|
||||
async with session.post(url, json=payload, headers=headers) as response:
|
||||
context.prev_resp_status = response.status
|
||||
context.prev_resp_text = await response.text()
|
||||
|
||||
|
||||
@step("the mixed Codex tools response succeeds")
|
||||
def step_mixed_tools_response_succeeds(context):
|
||||
"""Assert HTTP 200, valid JSON, and required Responses fields."""
|
||||
status = getattr(context, "responses_status", None)
|
||||
text = getattr(context, "responses_text", None)
|
||||
|
||||
assert status == 200, f"Mixed tools request failed with status {status}. Expected 200. Response: {text[:200] if text else '(empty)'}"
|
||||
assert text is not None, "No response body received"
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise AssertionError(f"Invalid JSON response: {e}") from e
|
||||
|
||||
assert "id" in data, "Expected 'id' in Responses response"
|
||||
assert "output" in data, "Expected 'output' in Responses response"
|
||||
|
||||
|
||||
@step("the Codex model catalog is compatible")
|
||||
def step_codex_model_catalog_compatible(context):
|
||||
"""Assert /v1/models preserves OpenAI shape and includes Codex ModelInfo."""
|
||||
status = getattr(context, "codex_models_status", None)
|
||||
text = getattr(context, "codex_models_text", None)
|
||||
|
||||
assert status == 200, f"Models request failed with status {status}. Response: {text[:200] if text else '(empty)'}"
|
||||
assert text is not None, "No models response body"
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise AssertionError(f"Invalid JSON models response: {e}") from e
|
||||
|
||||
assert data.get("object") == "list", "Expected OpenAI-compatible object=list"
|
||||
assert isinstance(data.get("data"), list), "Expected OpenAI-compatible data list"
|
||||
assert isinstance(data.get("models"), list), "Expected Codex-compatible models list"
|
||||
assert data["models"], "Expected at least one Codex model entry"
|
||||
|
||||
model = data["models"][0]
|
||||
for field in [
|
||||
"slug",
|
||||
"display_name",
|
||||
"supported_reasoning_levels",
|
||||
"shell_type",
|
||||
"visibility",
|
||||
"supported_in_api",
|
||||
"base_instructions",
|
||||
"truncation_policy",
|
||||
"context_window",
|
||||
"input_modalities",
|
||||
]:
|
||||
assert field in model, f"Expected Codex model field '{field}'"
|
||||
|
||||
assert model["slug"] == "tinyllama-2"
|
||||
assert model["visibility"] == "list"
|
||||
assert model["supported_in_api"] is True
|
||||
assert model["truncation_policy"]["mode"] == "tokens"
|
||||
assert model["context_window"] == 256
|
||||
|
||||
|
||||
@step("the probe response is accepted")
|
||||
def step_probe_response_accepted(context):
|
||||
"""Assert probe request succeeds with HTTP 200, valid JSON, id, and output."""
|
||||
status = getattr(context, "probe_status", None)
|
||||
text = getattr(context, "probe_text", None)
|
||||
|
||||
assert status is not None, "No probe response status"
|
||||
assert text is not None, "No probe response body"
|
||||
|
||||
assert status == 200, f"Probe request failed with status {status}. Expected 200. Response: {text[:200]}"
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise AssertionError(f"Invalid JSON probe response: {e}") from e
|
||||
|
||||
assert "id" in data, "Expected 'id' in probe response"
|
||||
assert "output" in data, "Expected 'output' in probe response"
|
||||
|
||||
|
||||
@step("the previous_response_id request returns an error")
|
||||
def step_previous_response_id_returns_error(context):
|
||||
"""Assert that invalid previous_response_id returns an error response (4xx or 5xx)."""
|
||||
status = getattr(context, "prev_resp_status", None)
|
||||
text = getattr(context, "prev_resp_text", None)
|
||||
|
||||
assert status is not None, "No previous_response_id response status"
|
||||
assert text is not None, "No previous_response_id response body"
|
||||
|
||||
assert status >= 400, f"Expected error status for invalid previous_response_id, got {status}"
|
||||
|
||||
try:
|
||||
data = json.loads(text)
|
||||
except json.JSONDecodeError as e:
|
||||
raise AssertionError(f"Invalid JSON error response: {e}") from e
|
||||
|
||||
assert "error" in data, f"Expected 'error' field in error response. Got: {list(data.keys())}"
|
||||
error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
|
||||
assert "previous_response_id" in error_msg, f"Expected 'previous_response_id' in error message. Got: {error_msg}"
|
||||
@ -72,6 +72,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
|
||||
context.response_format = None
|
||||
context.temperature = None
|
||||
context.lora_file = None
|
||||
context.jinja_enabled = False
|
||||
|
||||
context.tasks_result = []
|
||||
context.concurrent_tasks = []
|
||||
@ -176,6 +177,11 @@ def step_server_metrics(context):
|
||||
context.server_metrics = True
|
||||
|
||||
|
||||
@step('Jinja templating enabled')
|
||||
def step_enable_jinja(context):
|
||||
context.jinja_enabled = True
|
||||
|
||||
|
||||
@step("the server is starting")
|
||||
def step_start_server(context):
|
||||
start_server_background(context)
|
||||
@ -1347,6 +1353,8 @@ def start_server_background(context):
|
||||
server_args.append('--verbose')
|
||||
if context.lora_file:
|
||||
server_args.extend(['--lora', context.lora_file])
|
||||
if context.jinja_enabled:
|
||||
server_args.append('--jinja')
|
||||
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
|
||||
server_args.extend(['--log-format', "text"])
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user