Codex CLI Responses Compatibility (#1964)

* responses: skip known unsupported Responses tool types from Codex CLI - Skip namespace, web_search, image_generation tools instead of HTTP 500 - Reject unknown non-function tool types with controlled error - Preserve function tool conversion logic unchanged Fixes Codex CLI 0.133.0 compatibility where it sends mixed tool types. * responses: harden codex compatibility coverage * responses: expose Codex model catalog metadata
2026-06-28 04:30:15 -05:00 · 2026-06-17 01:28:16 +12:00 · 2026-06-17 01:28:16 +12:00 · 064d23a6f8
commit 064d23a6f8
parent d37d92b54c
5 changed files with 370 additions and 4 deletions
--- a/examples/server/server-chat.cpp
+++ b/examples/server/server-chat.cpp
@ -223,8 +223,12 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
        for (json resp_tool : response_body.at("tools")) {
            json chatcmpl_tool;

-            if (json_value(resp_tool, "type", std::string()) != "function") {
-                throw std::runtime_error("'type' of tool must be 'function'");
+            const std::string tool_type = json_value(resp_tool, "type", std::string());
+
+            // Chat Completions only supports function tools. Responses built-ins
+            // such as web_search, image_generation, and namespace are ignored.
+            if (tool_type != "function") {
+                continue;
            }
            resp_tool.erase("type");
            chatcmpl_tool["type"] = "function";
@ -236,7 +240,9 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
            chatcmpl_tools.push_back(chatcmpl_tool);
        }
        chatcmpl_body.erase("tools");
-        chatcmpl_body["tools"] = chatcmpl_tools;
+        if (!chatcmpl_tools.empty()) {
+            chatcmpl_body["tools"] = chatcmpl_tools;
+        }
    }

    if (response_body.contains("max_output_tokens")) {
@ -244,6 +250,15 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) {
        chatcmpl_body["max_tokens"] = response_body["max_output_tokens"];
    }

+    chatcmpl_body.erase("reasoning");
+    chatcmpl_body.erase("store");
+    chatcmpl_body.erase("include");
+    chatcmpl_body.erase("prompt_cache_key");
+    chatcmpl_body.erase("client_metadata");
+    chatcmpl_body.erase("background");
+    chatcmpl_body.erase("max_tool_calls");
+    chatcmpl_body.erase("metadata");
+
    return chatcmpl_body;
 }

--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@ -1275,6 +1275,48 @@ int main(int argc, char ** argv) {
    };

    const auto handle_models = [&params, &model_meta](const httplib::Request & req, httplib::Response & res) {
+        json codex_model = {
+            {"slug", params.model_alias},
+            {"display_name", params.model_alias},
+            {"description", nullptr},
+            {"default_reasoning_level", nullptr},
+            {"supported_reasoning_levels", json::array()},
+            {"shell_type", "default"},
+            {"visibility", "list"},
+            {"supported_in_api", true},
+            {"priority", 0},
+            {"additional_speed_tiers", json::array()},
+            {"service_tiers", json::array()},
+            {"default_service_tier", nullptr},
+            {"availability_nux", nullptr},
+            {"upgrade", nullptr},
+            {"base_instructions", ""},
+            {"model_messages", nullptr},
+            {"supports_reasoning_summaries", false},
+            {"default_reasoning_summary", "auto"},
+            {"support_verbosity", false},
+            {"default_verbosity", nullptr},
+            {"apply_patch_tool_type", nullptr},
+            {"web_search_tool_type", "text"},
+            {"truncation_policy", {
+                {"mode", "tokens"},
+                {"limit", params.n_ctx},
+            }},
+            {"supports_parallel_tool_calls", false},
+            {"supports_image_detail_original", false},
+            {"context_window", params.n_ctx},
+            {"max_context_window", params.n_ctx},
+            {"auto_compact_token_limit", (params.n_ctx * 9) / 10},
+            {"effective_context_window_percent", 95},
+            {"experimental_supported_tools", json::array()},
+            {"input_modalities", json::array({"text"})},
+            {"supports_search_tool", false},
+            {"use_responses_lite", false},
+            {"auto_review_model_override", nullptr},
+            {"tool_mode", nullptr},
+            {"multi_agent_version", nullptr},
+        };
+
        json models = {
            {"object", "list"},
            {"data", {
@ -1286,7 +1328,8 @@ int main(int argc, char ** argv) {
                     {"meta",     model_meta},
                     {"max_model_len", params.n_ctx}, //vllm specs
                 },
-             }}
+             }},
+            {"models", json::array({codex_model})},
        };

        res.set_content(models.dump(), "application/json; charset=utf-8");
--- a/examples/server/tests/features/codex_responses.feature
+++ b/examples/server/tests/features/codex_responses.feature
@ -0,0 +1,37 @@
+@llama.cpp
+@server
+@codex
+Feature: Codex CLI Responses API Compatibility
+
+  Background: Server startup
+    Given a server listening on localhost:8080
+    And   a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models
+    And   a model file test-model.gguf
+    And   a model alias tinyllama-2
+    And   42 as server seed
+    And   256 KV cache size
+    And   32 as batch size
+    And   2 slots
+    And   64 server max tokens to predict
+    And   Jinja templating enabled
+    Then  the server is starting
+    Then  the server is healthy
+
+  Scenario: Responses API accepts mixed tool types from Codex
+    Given a model test
+    And   an OAI compatible responses request with mixed Codex tool types
+    Then  the mixed Codex tools response succeeds
+
+  Scenario: Models endpoint includes Codex model catalog metadata
+    Given the Codex model catalog is requested
+    Then  the Codex model catalog is compatible
+
+  Scenario: Probe request with empty input and max_output_tokens=1 is accepted
+    Given a model test
+    And   a probe responses request with empty input and max_output_tokens=1
+    Then  the probe response is accepted
+
+  Scenario: previous_response_id returns a controlled error
+    Given a model test
+    And   a responses request with invalid previous_response_id
+    Then  the previous_response_id request returns an error
--- a/examples/server/tests/features/steps/codex_responses_steps.py
+++ b/examples/server/tests/features/steps/codex_responses_steps.py
@ -0,0 +1,263 @@
+"""
+Behave step definitions for Codex CLI Responses API compatibility tests.
+Tests that /v1/responses accepts Codex-shaped payloads with mixed tool types.
+"""
+
+import json
+
+from behave import step  # pyright: ignore[reportAttributeAccessIssue]
+from behave.api.async_step import async_run_until_complete
+
+import aiohttp
+
+
+# Codex CLI 0.133.0 sends mixed tool types: function + namespace + web_search + image_generation
+CODEX_MIXED_TOOLS_PAYLOAD = {
+    "model": "test",
+    "input": [
+        {
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": "Reply exactly local-ok"}]
+        }
+    ],
+    "tools": [
+        {
+            "type": "function",
+            "name": "exec_command",
+            "description": "Run a command",
+            "parameters": {
+                "type": "object",
+                "properties": {"cmd": {"type": "string"}},
+                "required": ["cmd"]
+            },
+            "strict": False
+        },
+        {
+            "type": "namespace",
+            "name": "multi_agent_v1",
+            "description": "Sub-agent tools",
+            "tools": []
+        },
+        {
+            "type": "web_search",
+            "external_web_access": True
+        },
+        {
+            "type": "image_generation",
+            "output_format": "png"
+        }
+    ],
+    "tool_choice": "auto",
+    "parallel_tool_calls": False,
+    "stream": False,
+    "max_output_tokens": 8,
+}
+
+PROBE_EMPTY_INPUT_PAYLOAD = {
+    "model": "test",
+    "input": "",
+    "stream": False,
+    "max_output_tokens": 1,
+}
+
+INVALID_PREVIOUS_RESPONSE_PAYLOAD = {
+    "model": "test",
+    "input": [
+        {
+            "type": "message",
+            "role": "user",
+            "content": [{"type": "input_text", "text": "test"}]
+        }
+    ],
+    "previous_response_id": "invalid-id-12345",
+    "stream": False,
+}
+
+
+@step("an OAI compatible responses request with mixed Codex tool types")
+@async_run_until_complete
+async def step_oai_responses_mixed_tools(context):
+    """
+    Send a Responses API request with mixed tool types from Codex CLI:
+    - function (should be converted)
+    - namespace, web_search, image_generation (should be skipped, not rejected)
+    """
+    if context.debug:
+        print("Submitting Responses API request with mixed Codex tool types...")
+
+    payload = CODEX_MIXED_TOOLS_PAYLOAD.copy()
+    if hasattr(context, "model") and context.model:
+        payload["model"] = context.model
+
+    async with aiohttp.ClientSession() as session:
+        url = f"{context.base_url}/v1/responses"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
+        }
+
+        async with session.post(url, json=payload, headers=headers) as response:
+            context.responses_status = response.status
+            context.responses_text = await response.text()
+
+
+@step("the Codex model catalog is requested")
+@async_run_until_complete
+async def step_codex_model_catalog_requested(context):
+    """Fetch /v1/models and keep the raw response for Codex catalog assertions."""
+    async with aiohttp.ClientSession() as session:
+        url = f"{context.base_url}/v1/models"
+        headers = {
+            "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
+        }
+
+        async with session.get(url, headers=headers) as response:
+            context.codex_models_status = response.status
+            context.codex_models_text = await response.text()
+
+
+@step("a probe responses request with empty input and max_output_tokens=1")
+@async_run_until_complete
+async def step_probe_empty_input(context):
+    """Send a probe request with empty input and minimal tokens."""
+    if context.debug:
+        print("Submitting probe Responses API request with empty input...")
+
+    payload = PROBE_EMPTY_INPUT_PAYLOAD.copy()
+    if hasattr(context, "model") and context.model:
+        payload["model"] = context.model
+
+    async with aiohttp.ClientSession() as session:
+        url = f"{context.base_url}/v1/responses"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
+        }
+
+        async with session.post(url, json=payload, headers=headers) as response:
+            context.probe_status = response.status
+            context.probe_text = await response.text()
+
+
+@step("a responses request with invalid previous_response_id")
+@async_run_until_complete
+async def step_invalid_previous_response_id(context):
+    """Send a request with an invalid previous_response_id to trigger an error."""
+    if context.debug:
+        print("Submitting Responses API request with invalid previous_response_id...")
+
+    payload = INVALID_PREVIOUS_RESPONSE_PAYLOAD.copy()
+    if hasattr(context, "model") and context.model:
+        payload["model"] = context.model
+
+    async with aiohttp.ClientSession() as session:
+        url = f"{context.base_url}/v1/responses"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
+        }
+
+        async with session.post(url, json=payload, headers=headers) as response:
+            context.prev_resp_status = response.status
+            context.prev_resp_text = await response.text()
+
+
+@step("the mixed Codex tools response succeeds")
+def step_mixed_tools_response_succeeds(context):
+    """Assert HTTP 200, valid JSON, and required Responses fields."""
+    status = getattr(context, "responses_status", None)
+    text = getattr(context, "responses_text", None)
+
+    assert status == 200, f"Mixed tools request failed with status {status}. Expected 200. Response: {text[:200] if text else '(empty)'}"
+    assert text is not None, "No response body received"
+
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError as e:
+        raise AssertionError(f"Invalid JSON response: {e}") from e
+
+    assert "id" in data, "Expected 'id' in Responses response"
+    assert "output" in data, "Expected 'output' in Responses response"
+
+
+@step("the Codex model catalog is compatible")
+def step_codex_model_catalog_compatible(context):
+    """Assert /v1/models preserves OpenAI shape and includes Codex ModelInfo."""
+    status = getattr(context, "codex_models_status", None)
+    text = getattr(context, "codex_models_text", None)
+
+    assert status == 200, f"Models request failed with status {status}. Response: {text[:200] if text else '(empty)'}"
+    assert text is not None, "No models response body"
+
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError as e:
+        raise AssertionError(f"Invalid JSON models response: {e}") from e
+
+    assert data.get("object") == "list", "Expected OpenAI-compatible object=list"
+    assert isinstance(data.get("data"), list), "Expected OpenAI-compatible data list"
+    assert isinstance(data.get("models"), list), "Expected Codex-compatible models list"
+    assert data["models"], "Expected at least one Codex model entry"
+
+    model = data["models"][0]
+    for field in [
+        "slug",
+        "display_name",
+        "supported_reasoning_levels",
+        "shell_type",
+        "visibility",
+        "supported_in_api",
+        "base_instructions",
+        "truncation_policy",
+        "context_window",
+        "input_modalities",
+    ]:
+        assert field in model, f"Expected Codex model field '{field}'"
+
+    assert model["slug"] == "tinyllama-2"
+    assert model["visibility"] == "list"
+    assert model["supported_in_api"] is True
+    assert model["truncation_policy"]["mode"] == "tokens"
+    assert model["context_window"] == 256
+
+
+@step("the probe response is accepted")
+def step_probe_response_accepted(context):
+    """Assert probe request succeeds with HTTP 200, valid JSON, id, and output."""
+    status = getattr(context, "probe_status", None)
+    text = getattr(context, "probe_text", None)
+
+    assert status is not None, "No probe response status"
+    assert text is not None, "No probe response body"
+
+    assert status == 200, f"Probe request failed with status {status}. Expected 200. Response: {text[:200]}"
+
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError as e:
+        raise AssertionError(f"Invalid JSON probe response: {e}") from e
+
+    assert "id" in data, "Expected 'id' in probe response"
+    assert "output" in data, "Expected 'output' in probe response"
+
+
+@step("the previous_response_id request returns an error")
+def step_previous_response_id_returns_error(context):
+    """Assert that invalid previous_response_id returns an error response (4xx or 5xx)."""
+    status = getattr(context, "prev_resp_status", None)
+    text = getattr(context, "prev_resp_text", None)
+
+    assert status is not None, "No previous_response_id response status"
+    assert text is not None, "No previous_response_id response body"
+
+    assert status >= 400, f"Expected error status for invalid previous_response_id, got {status}"
+
+    try:
+        data = json.loads(text)
+    except json.JSONDecodeError as e:
+        raise AssertionError(f"Invalid JSON error response: {e}") from e
+
+    assert "error" in data, f"Expected 'error' field in error response. Got: {list(data.keys())}"
+    error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
+    assert "previous_response_id" in error_msg, f"Expected 'previous_response_id' in error message. Got: {error_msg}"
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -72,6 +72,7 @@ def step_server_config(context, server_fqdn: str, server_port: str):
    context.response_format = None
    context.temperature = None
    context.lora_file = None
+    context.jinja_enabled = False

    context.tasks_result = []
    context.concurrent_tasks = []
@ -176,6 +177,11 @@ def step_server_metrics(context):
    context.server_metrics = True


+@step('Jinja templating enabled')
+def step_enable_jinja(context):
+    context.jinja_enabled = True
+
+
@step("the server is starting")
 def step_start_server(context):
    start_server_background(context)
@ -1347,6 +1353,8 @@ def start_server_background(context):
        server_args.append('--verbose')
    if context.lora_file:
        server_args.extend(['--lora', context.lora_file])
+    if context.jinja_enabled:
+        server_args.append('--jinja')
    if 'SERVER_LOG_FORMAT_JSON' not in os.environ:
        server_args.extend(['--log-format', "text"])