Codex CLI Responses Compatibility (#1964)

* responses: skip known unsupported Responses tool types from Codex CLI

- Skip namespace, web_search, image_generation tools instead of HTTP 500
- Reject unknown non-function tool types with controlled error
- Preserve function tool conversion logic unchanged

Fixes Codex CLI 0.133.0 compatibility where it sends mixed tool types.

* responses: harden codex compatibility coverage

* responses: expose Codex model catalog metadata
This commit is contained in:
Jun Yamog 2026-06-17 01:28:16 +12:00 committed by GitHub
parent d37d92b54c
commit 064d23a6f8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 370 additions and 4 deletions

View File

@ -223,8 +223,12 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
for (json resp_tool : response_body.at("tools")) {
json chatcmpl_tool;
if (json_value(resp_tool, "type", std::string()) != "function") {
throw std::runtime_error("'type' of tool must be 'function'");
const std::string tool_type = json_value(resp_tool, "type", std::string());
// Chat Completions only supports function tools. Responses built-ins
// such as web_search, image_generation, and namespace are ignored.
if (tool_type != "function") {
continue;
}
resp_tool.erase("type");
chatcmpl_tool["type"] = "function";
@ -236,7 +240,9 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
chatcmpl_tools.push_back(chatcmpl_tool);
}
chatcmpl_body.erase("tools");
chatcmpl_body["tools"] = chatcmpl_tools;
if (!chatcmpl_tools.empty()) {
chatcmpl_body["tools"] = chatcmpl_tools;
}
}
if (response_body.contains("max_output_tokens")) {
@ -244,6 +250,15 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
}
chatcmpl_body.erase("reasoning");
chatcmpl_body.erase("store");
chatcmpl_body.erase("include");
chatcmpl_body.erase("prompt_cache_key");
chatcmpl_body.erase("client_metadata");
chatcmpl_body.erase("background");
chatcmpl_body.erase("max_tool_calls");
chatcmpl_body.erase("metadata");
return chatcmpl_body;
}

View File

@ -1275,6 +1275,48 @@ int main(int argc, char ** argv) {
};
const auto handle_models = [&params, &model_meta](const httplib::Request & req, httplib::Response & res) {
json codex_model = {
{"slug", params.model_alias},
{"display_name", params.model_alias},
{"description", nullptr},
{"default_reasoning_level", nullptr},
{"supported_reasoning_levels", json::array()},
{"shell_type", "default"},
{"visibility", "list"},
{"supported_in_api", true},
{"priority", 0},
{"additional_speed_tiers", json::array()},
{"service_tiers", json::array()},
{"default_service_tier", nullptr},
{"availability_nux", nullptr},
{"upgrade", nullptr},
{"base_instructions", ""},
{"model_messages", nullptr},
{"supports_reasoning_summaries", false},
{"default_reasoning_summary", "auto"},
{"support_verbosity", false},
{"default_verbosity", nullptr},
{"apply_patch_tool_type", nullptr},
{"web_search_tool_type", "text"},
{"truncation_policy", {
{"mode", "tokens"},
{"limit", params.n_ctx},
}},
{"supports_parallel_tool_calls", false},
{"supports_image_detail_original", false},
{"context_window", params.n_ctx},
{"max_context_window", params.n_ctx},
{"auto_compact_token_limit", (params.n_ctx * 9) / 10},
{"effective_context_window_percent", 95},
{"experimental_supported_tools", json::array()},
{"input_modalities", json::array({"text"})},
{"supports_search_tool", false},
{"use_responses_lite", false},
{"auto_review_model_override", nullptr},
{"tool_mode", nullptr},
{"multi_agent_version", nullptr},
};
json models = {
{"object", "list"},
{"data", {
@ -1286,7 +1328,8 @@ int main(int argc, char ** argv) {
{"meta", model_meta},
{"max_model_len", params.n_ctx}, //vllm specs
},
}}
}},
{"models", json::array({codex_model})},
};
res.set_content(models.dump(), "application/json; charset=utf-8");

View File

@ -0,0 +1,37 @@
@llama.cpp
@server
@codex
Feature: Codex CLI Responses API Compatibility
Background: Server startup
Given a server listening on localhost:8080
And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
And a model file test-model.gguf
And a model alias tinyllama-2
And 42 as server seed
And 256 KV cache size
And 32 as batch size
And 2 slots
And 64 server max tokens to predict
And Jinja templating enabled
Then the server is starting
Then the server is healthy
Scenario: Responses API accepts mixed tool types from Codex
Given a model test
And an OAI compatible responses request with mixed Codex tool types
Then the mixed Codex tools response succeeds
Scenario: Models endpoint includes Codex model catalog metadata
Given the Codex model catalog is requested
Then the Codex model catalog is compatible
Scenario: Probe request with empty input and max_output_tokens=1 is accepted
Given a model test
And a probe responses request with empty input and max_output_tokens=1
Then the probe response is accepted
Scenario: previous_response_id returns a controlled error
Given a model test
And a responses request with invalid previous_response_id
Then the previous_response_id request returns an error

View File

@ -0,0 +1,263 @@
"""
Behave step definitions for Codex CLI Responses API compatibility tests.
Tests that /v1/responses accepts Codex-shaped payloads with mixed tool types.
"""
import json
from behave import step # pyright: ignore[reportAttributeAccessIssue]
from behave.api.async_step import async_run_until_complete
import aiohttp
# Codex CLI 0.133.0 sends mixed tool types: function + namespace + web_search + image_generation
CODEX_MIXED_TOOLS_PAYLOAD = {
"model": "test",
"input": [
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "Reply exactly local-ok"}]
}
],
"tools": [
{
"type": "function",
"name": "exec_command",
"description": "Run a command",
"parameters": {
"type": "object",
"properties": {"cmd": {"type": "string"}},
"required": ["cmd"]
},
"strict": False
},
{
"type": "namespace",
"name": "multi_agent_v1",
"description": "Sub-agent tools",
"tools": []
},
{
"type": "web_search",
"external_web_access": True
},
{
"type": "image_generation",
"output_format": "png"
}
],
"tool_choice": "auto",
"parallel_tool_calls": False,
"stream": False,
"max_output_tokens": 8,
}
PROBE_EMPTY_INPUT_PAYLOAD = {
"model": "test",
"input": "",
"stream": False,
"max_output_tokens": 1,
}
INVALID_PREVIOUS_RESPONSE_PAYLOAD = {
"model": "test",
"input": [
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "test"}]
}
],
"previous_response_id": "invalid-id-12345",
"stream": False,
}
@step("an OAI compatible responses request with mixed Codex tool types")
@async_run_until_complete
async def step_oai_responses_mixed_tools(context):
"""
Send a Responses API request with mixed tool types from Codex CLI:
- function (should be converted)
- namespace, web_search, image_generation (should be skipped, not rejected)
"""
if context.debug:
print("Submitting Responses API request with mixed Codex tool types...")
payload = CODEX_MIXED_TOOLS_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.responses_status = response.status
context.responses_text = await response.text()
@step("the Codex model catalog is requested")
@async_run_until_complete
async def step_codex_model_catalog_requested(context):
"""Fetch /v1/models and keep the raw response for Codex catalog assertions."""
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/models"
headers = {
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.get(url, headers=headers) as response:
context.codex_models_status = response.status
context.codex_models_text = await response.text()
@step("a probe responses request with empty input and max_output_tokens=1")
@async_run_until_complete
async def step_probe_empty_input(context):
"""Send a probe request with empty input and minimal tokens."""
if context.debug:
print("Submitting probe Responses API request with empty input...")
payload = PROBE_EMPTY_INPUT_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.probe_status = response.status
context.probe_text = await response.text()
@step("a responses request with invalid previous_response_id")
@async_run_until_complete
async def step_invalid_previous_response_id(context):
"""Send a request with an invalid previous_response_id to trigger an error."""
if context.debug:
print("Submitting Responses API request with invalid previous_response_id...")
payload = INVALID_PREVIOUS_RESPONSE_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.prev_resp_status = response.status
context.prev_resp_text = await response.text()
@step("the mixed Codex tools response succeeds")
def step_mixed_tools_response_succeeds(context):
"""Assert HTTP 200, valid JSON, and required Responses fields."""
status = getattr(context, "responses_status", None)
text = getattr(context, "responses_text", None)
assert status == 200, f"Mixed tools request failed with status {status}. Expected 200. Response: {text[:200] if text else '(empty)'}"
assert text is not None, "No response body received"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON response: {e}") from e
assert "id" in data, "Expected 'id' in Responses response"
assert "output" in data, "Expected 'output' in Responses response"
@step("the Codex model catalog is compatible")
def step_codex_model_catalog_compatible(context):
"""Assert /v1/models preserves OpenAI shape and includes Codex ModelInfo."""
status = getattr(context, "codex_models_status", None)
text = getattr(context, "codex_models_text", None)
assert status == 200, f"Models request failed with status {status}. Response: {text[:200] if text else '(empty)'}"
assert text is not None, "No models response body"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON models response: {e}") from e
assert data.get("object") == "list", "Expected OpenAI-compatible object=list"
assert isinstance(data.get("data"), list), "Expected OpenAI-compatible data list"
assert isinstance(data.get("models"), list), "Expected Codex-compatible models list"
assert data["models"], "Expected at least one Codex model entry"
model = data["models"][0]
for field in [
"slug",
"display_name",
"supported_reasoning_levels",
"shell_type",
"visibility",
"supported_in_api",
"base_instructions",
"truncation_policy",
"context_window",
"input_modalities",
]:
assert field in model, f"Expected Codex model field '{field}'"
assert model["slug"] == "tinyllama-2"
assert model["visibility"] == "list"
assert model["supported_in_api"] is True
assert model["truncation_policy"]["mode"] == "tokens"
assert model["context_window"] == 256
@step("the probe response is accepted")
def step_probe_response_accepted(context):
"""Assert probe request succeeds with HTTP 200, valid JSON, id, and output."""
status = getattr(context, "probe_status", None)
text = getattr(context, "probe_text", None)
assert status is not None, "No probe response status"
assert text is not None, "No probe response body"
assert status == 200, f"Probe request failed with status {status}. Expected 200. Response: {text[:200]}"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON probe response: {e}") from e
assert "id" in data, "Expected 'id' in probe response"
assert "output" in data, "Expected 'output' in probe response"
@step("the previous_response_id request returns an error")
def step_previous_response_id_returns_error(context):
"""Assert that invalid previous_response_id returns an error response (4xx or 5xx)."""
status = getattr(context, "prev_resp_status", None)
text = getattr(context, "prev_resp_text", None)
assert status is not None, "No previous_response_id response status"
assert text is not None, "No previous_response_id response body"
assert status >= 400, f"Expected error status for invalid previous_response_id, got {status}"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON error response: {e}") from e
assert "error" in data, f"Expected 'error' field in error response. Got: {list(data.keys())}"
error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
assert "previous_response_id" in error_msg, f"Expected 'previous_response_id' in error message. Got: {error_msg}"

View File

@ -72,6 +72,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
context.response_format = None
context.temperature = None
context.lora_file = None
context.jinja_enabled = False
context.tasks_result = []
context.concurrent_tasks = []
@ -176,6 +177,11 @@ def step_server_metrics(context):
context.server_metrics = True
@step('Jinja templating enabled')
def step_enable_jinja(context):
context.jinja_enabled = True
@step("the server is starting")
def step_start_server(context):
start_server_background(context)
@ -1347,6 +1353,8 @@ def start_server_background(context):
server_args.append('--verbose')
if context.lora_file:
server_args.extend(['--lora', context.lora_file])
if context.jinja_enabled:
server_args.append('--jinja')
if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
server_args.extend(['--log-format', "text"])