diff --git a/examples/server/server-chat.cpp b/examples/server/server-chat.cpp index 9afb9499..db3cfc6f 100644 --- a/examples/server/server-chat.cpp +++ b/examples/server/server-chat.cpp @@ -223,8 +223,12 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) { for (json resp_tool : response_body.at("tools")) { json chatcmpl_tool; - if (json_value(resp_tool, "type", std::string()) != "function") { - throw std::runtime_error("'type' of tool must be 'function'"); + const std::string tool_type = json_value(resp_tool, "type", std::string()); + + // Chat Completions only supports function tools. Responses built-ins + // such as web_search, image_generation, and namespace are ignored. + if (tool_type != "function") { + continue; } resp_tool.erase("type"); chatcmpl_tool["type"] = "function"; @@ -236,7 +240,9 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) { chatcmpl_tools.push_back(chatcmpl_tool); } chatcmpl_body.erase("tools"); - chatcmpl_body["tools"] = chatcmpl_tools; + if (!chatcmpl_tools.empty()) { + chatcmpl_body["tools"] = chatcmpl_tools; + } } if (response_body.contains("max_output_tokens")) { @@ -244,6 +250,15 @@ json server_chat_convert_responses_to_chatcmpl(const json& response_body) { chatcmpl_body["max_tokens"] = response_body["max_output_tokens"]; } + chatcmpl_body.erase("reasoning"); + chatcmpl_body.erase("store"); + chatcmpl_body.erase("include"); + chatcmpl_body.erase("prompt_cache_key"); + chatcmpl_body.erase("client_metadata"); + chatcmpl_body.erase("background"); + chatcmpl_body.erase("max_tool_calls"); + chatcmpl_body.erase("metadata"); + return chatcmpl_body; } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index b51f8e4f..26739170 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1275,6 +1275,48 @@ int main(int argc, char ** argv) { }; const auto handle_models = [¶ms, &model_meta](const httplib::Request & req, httplib::Response & res) { + json codex_model = { + {"slug", params.model_alias}, + {"display_name", params.model_alias}, + {"description", nullptr}, + {"default_reasoning_level", nullptr}, + {"supported_reasoning_levels", json::array()}, + {"shell_type", "default"}, + {"visibility", "list"}, + {"supported_in_api", true}, + {"priority", 0}, + {"additional_speed_tiers", json::array()}, + {"service_tiers", json::array()}, + {"default_service_tier", nullptr}, + {"availability_nux", nullptr}, + {"upgrade", nullptr}, + {"base_instructions", ""}, + {"model_messages", nullptr}, + {"supports_reasoning_summaries", false}, + {"default_reasoning_summary", "auto"}, + {"support_verbosity", false}, + {"default_verbosity", nullptr}, + {"apply_patch_tool_type", nullptr}, + {"web_search_tool_type", "text"}, + {"truncation_policy", { + {"mode", "tokens"}, + {"limit", params.n_ctx}, + }}, + {"supports_parallel_tool_calls", false}, + {"supports_image_detail_original", false}, + {"context_window", params.n_ctx}, + {"max_context_window", params.n_ctx}, + {"auto_compact_token_limit", (params.n_ctx * 9) / 10}, + {"effective_context_window_percent", 95}, + {"experimental_supported_tools", json::array()}, + {"input_modalities", json::array({"text"})}, + {"supports_search_tool", false}, + {"use_responses_lite", false}, + {"auto_review_model_override", nullptr}, + {"tool_mode", nullptr}, + {"multi_agent_version", nullptr}, + }; + json models = { {"object", "list"}, {"data", { @@ -1286,7 +1328,8 @@ int main(int argc, char ** argv) { {"meta", model_meta}, {"max_model_len", params.n_ctx}, //vllm specs }, - }} + }}, + {"models", json::array({codex_model})}, }; res.set_content(models.dump(), "application/json; charset=utf-8"); diff --git a/examples/server/tests/features/codex_responses.feature b/examples/server/tests/features/codex_responses.feature new file mode 100644 index 00000000..96dd00a6 --- /dev/null +++ b/examples/server/tests/features/codex_responses.feature @@ -0,0 +1,37 @@ +@llama.cpp +@server +@codex +Feature: Codex CLI Responses API Compatibility + + Background: Server startup + Given a server listening on localhost:8080 + And a model file tinyllamas/stories260K.gguf from HF repo ggml-org/models + And a model file test-model.gguf + And a model alias tinyllama-2 + And 42 as server seed + And 256 KV cache size + And 32 as batch size + And 2 slots + And 64 server max tokens to predict + And Jinja templating enabled + Then the server is starting + Then the server is healthy + + Scenario: Responses API accepts mixed tool types from Codex + Given a model test + And an OAI compatible responses request with mixed Codex tool types + Then the mixed Codex tools response succeeds + + Scenario: Models endpoint includes Codex model catalog metadata + Given the Codex model catalog is requested + Then the Codex model catalog is compatible + + Scenario: Probe request with empty input and max_output_tokens=1 is accepted + Given a model test + And a probe responses request with empty input and max_output_tokens=1 + Then the probe response is accepted + + Scenario: previous_response_id returns a controlled error + Given a model test + And a responses request with invalid previous_response_id + Then the previous_response_id request returns an error diff --git a/examples/server/tests/features/steps/codex_responses_steps.py b/examples/server/tests/features/steps/codex_responses_steps.py new file mode 100644 index 00000000..a11f6860 --- /dev/null +++ b/examples/server/tests/features/steps/codex_responses_steps.py @@ -0,0 +1,263 @@ +""" +Behave step definitions for Codex CLI Responses API compatibility tests. +Tests that /v1/responses accepts Codex-shaped payloads with mixed tool types. +""" + +import json + +from behave import step # pyright: ignore[reportAttributeAccessIssue] +from behave.api.async_step import async_run_until_complete + +import aiohttp + + +# Codex CLI 0.133.0 sends mixed tool types: function + namespace + web_search + image_generation +CODEX_MIXED_TOOLS_PAYLOAD = { + "model": "test", + "input": [ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "Reply exactly local-ok"}] + } + ], + "tools": [ + { + "type": "function", + "name": "exec_command", + "description": "Run a command", + "parameters": { + "type": "object", + "properties": {"cmd": {"type": "string"}}, + "required": ["cmd"] + }, + "strict": False + }, + { + "type": "namespace", + "name": "multi_agent_v1", + "description": "Sub-agent tools", + "tools": [] + }, + { + "type": "web_search", + "external_web_access": True + }, + { + "type": "image_generation", + "output_format": "png" + } + ], + "tool_choice": "auto", + "parallel_tool_calls": False, + "stream": False, + "max_output_tokens": 8, +} + +PROBE_EMPTY_INPUT_PAYLOAD = { + "model": "test", + "input": "", + "stream": False, + "max_output_tokens": 1, +} + +INVALID_PREVIOUS_RESPONSE_PAYLOAD = { + "model": "test", + "input": [ + { + "type": "message", + "role": "user", + "content": [{"type": "input_text", "text": "test"}] + } + ], + "previous_response_id": "invalid-id-12345", + "stream": False, +} + + +@step("an OAI compatible responses request with mixed Codex tool types") +@async_run_until_complete +async def step_oai_responses_mixed_tools(context): + """ + Send a Responses API request with mixed tool types from Codex CLI: + - function (should be converted) + - namespace, web_search, image_generation (should be skipped, not rejected) + """ + if context.debug: + print("Submitting Responses API request with mixed Codex tool types...") + + payload = CODEX_MIXED_TOOLS_PAYLOAD.copy() + if hasattr(context, "model") and context.model: + payload["model"] = context.model + + async with aiohttp.ClientSession() as session: + url = f"{context.base_url}/v1/responses" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}", + } + + async with session.post(url, json=payload, headers=headers) as response: + context.responses_status = response.status + context.responses_text = await response.text() + + +@step("the Codex model catalog is requested") +@async_run_until_complete +async def step_codex_model_catalog_requested(context): + """Fetch /v1/models and keep the raw response for Codex catalog assertions.""" + async with aiohttp.ClientSession() as session: + url = f"{context.base_url}/v1/models" + headers = { + "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}", + } + + async with session.get(url, headers=headers) as response: + context.codex_models_status = response.status + context.codex_models_text = await response.text() + + +@step("a probe responses request with empty input and max_output_tokens=1") +@async_run_until_complete +async def step_probe_empty_input(context): + """Send a probe request with empty input and minimal tokens.""" + if context.debug: + print("Submitting probe Responses API request with empty input...") + + payload = PROBE_EMPTY_INPUT_PAYLOAD.copy() + if hasattr(context, "model") and context.model: + payload["model"] = context.model + + async with aiohttp.ClientSession() as session: + url = f"{context.base_url}/v1/responses" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}", + } + + async with session.post(url, json=payload, headers=headers) as response: + context.probe_status = response.status + context.probe_text = await response.text() + + +@step("a responses request with invalid previous_response_id") +@async_run_until_complete +async def step_invalid_previous_response_id(context): + """Send a request with an invalid previous_response_id to trigger an error.""" + if context.debug: + print("Submitting Responses API request with invalid previous_response_id...") + + payload = INVALID_PREVIOUS_RESPONSE_PAYLOAD.copy() + if hasattr(context, "model") and context.model: + payload["model"] = context.model + + async with aiohttp.ClientSession() as session: + url = f"{context.base_url}/v1/responses" + headers = { + "Content-Type": "application/json", + "Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}", + } + + async with session.post(url, json=payload, headers=headers) as response: + context.prev_resp_status = response.status + context.prev_resp_text = await response.text() + + +@step("the mixed Codex tools response succeeds") +def step_mixed_tools_response_succeeds(context): + """Assert HTTP 200, valid JSON, and required Responses fields.""" + status = getattr(context, "responses_status", None) + text = getattr(context, "responses_text", None) + + assert status == 200, f"Mixed tools request failed with status {status}. Expected 200. Response: {text[:200] if text else '(empty)'}" + assert text is not None, "No response body received" + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise AssertionError(f"Invalid JSON response: {e}") from e + + assert "id" in data, "Expected 'id' in Responses response" + assert "output" in data, "Expected 'output' in Responses response" + + +@step("the Codex model catalog is compatible") +def step_codex_model_catalog_compatible(context): + """Assert /v1/models preserves OpenAI shape and includes Codex ModelInfo.""" + status = getattr(context, "codex_models_status", None) + text = getattr(context, "codex_models_text", None) + + assert status == 200, f"Models request failed with status {status}. Response: {text[:200] if text else '(empty)'}" + assert text is not None, "No models response body" + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise AssertionError(f"Invalid JSON models response: {e}") from e + + assert data.get("object") == "list", "Expected OpenAI-compatible object=list" + assert isinstance(data.get("data"), list), "Expected OpenAI-compatible data list" + assert isinstance(data.get("models"), list), "Expected Codex-compatible models list" + assert data["models"], "Expected at least one Codex model entry" + + model = data["models"][0] + for field in [ + "slug", + "display_name", + "supported_reasoning_levels", + "shell_type", + "visibility", + "supported_in_api", + "base_instructions", + "truncation_policy", + "context_window", + "input_modalities", + ]: + assert field in model, f"Expected Codex model field '{field}'" + + assert model["slug"] == "tinyllama-2" + assert model["visibility"] == "list" + assert model["supported_in_api"] is True + assert model["truncation_policy"]["mode"] == "tokens" + assert model["context_window"] == 256 + + +@step("the probe response is accepted") +def step_probe_response_accepted(context): + """Assert probe request succeeds with HTTP 200, valid JSON, id, and output.""" + status = getattr(context, "probe_status", None) + text = getattr(context, "probe_text", None) + + assert status is not None, "No probe response status" + assert text is not None, "No probe response body" + + assert status == 200, f"Probe request failed with status {status}. Expected 200. Response: {text[:200]}" + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise AssertionError(f"Invalid JSON probe response: {e}") from e + + assert "id" in data, "Expected 'id' in probe response" + assert "output" in data, "Expected 'output' in probe response" + + +@step("the previous_response_id request returns an error") +def step_previous_response_id_returns_error(context): + """Assert that invalid previous_response_id returns an error response (4xx or 5xx).""" + status = getattr(context, "prev_resp_status", None) + text = getattr(context, "prev_resp_text", None) + + assert status is not None, "No previous_response_id response status" + assert text is not None, "No previous_response_id response body" + + assert status >= 400, f"Expected error status for invalid previous_response_id, got {status}" + + try: + data = json.loads(text) + except json.JSONDecodeError as e: + raise AssertionError(f"Invalid JSON error response: {e}") from e + + assert "error" in data, f"Expected 'error' field in error response. Got: {list(data.keys())}" + error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"]) + assert "previous_response_id" in error_msg, f"Expected 'previous_response_id' in error message. Got: {error_msg}" diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 6705a34f..4353e00a 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -72,6 +72,7 @@ def step_server_config(context, server_fqdn: str, server_port: str): context.response_format = None context.temperature = None context.lora_file = None + context.jinja_enabled = False context.tasks_result = [] context.concurrent_tasks = [] @@ -176,6 +177,11 @@ def step_server_metrics(context): context.server_metrics = True +@step('Jinja templating enabled') +def step_enable_jinja(context): + context.jinja_enabled = True + + @step("the server is starting") def step_start_server(context): start_server_background(context) @@ -1347,6 +1353,8 @@ def start_server_background(context): server_args.append('--verbose') if context.lora_file: server_args.extend(['--lora', context.lora_file]) + if context.jinja_enabled: + server_args.append('--jinja') if 'SERVER_LOG_FORMAT_JSON' not in os.environ: server_args.extend(['--log-format', "text"])