ik_llama.cpp/examples/server/tests/features/steps/codex_responses_steps.py
Jun Yamog 064d23a6f8
Codex CLI Responses Compatibility (#1964)
* responses: skip known unsupported Responses tool types from Codex CLI

- Skip namespace, web_search, image_generation tools instead of HTTP 500
- Reject unknown non-function tool types with controlled error
- Preserve function tool conversion logic unchanged

Fixes Codex CLI 0.133.0 compatibility where it sends mixed tool types.

* responses: harden codex compatibility coverage

* responses: expose Codex model catalog metadata
2026-06-16 15:28:16 +02:00

264 lines
9.4 KiB
Python

"""
Behave step definitions for Codex CLI Responses API compatibility tests.
Tests that /v1/responses accepts Codex-shaped payloads with mixed tool types.
"""
import json
from behave import step # pyright: ignore[reportAttributeAccessIssue]
from behave.api.async_step import async_run_until_complete
import aiohttp
# Codex CLI 0.133.0 sends mixed tool types: function + namespace + web_search + image_generation
CODEX_MIXED_TOOLS_PAYLOAD = {
"model": "test",
"input": [
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "Reply exactly local-ok"}]
}
],
"tools": [
{
"type": "function",
"name": "exec_command",
"description": "Run a command",
"parameters": {
"type": "object",
"properties": {"cmd": {"type": "string"}},
"required": ["cmd"]
},
"strict": False
},
{
"type": "namespace",
"name": "multi_agent_v1",
"description": "Sub-agent tools",
"tools": []
},
{
"type": "web_search",
"external_web_access": True
},
{
"type": "image_generation",
"output_format": "png"
}
],
"tool_choice": "auto",
"parallel_tool_calls": False,
"stream": False,
"max_output_tokens": 8,
}
PROBE_EMPTY_INPUT_PAYLOAD = {
"model": "test",
"input": "",
"stream": False,
"max_output_tokens": 1,
}
INVALID_PREVIOUS_RESPONSE_PAYLOAD = {
"model": "test",
"input": [
{
"type": "message",
"role": "user",
"content": [{"type": "input_text", "text": "test"}]
}
],
"previous_response_id": "invalid-id-12345",
"stream": False,
}
@step("an OAI compatible responses request with mixed Codex tool types")
@async_run_until_complete
async def step_oai_responses_mixed_tools(context):
"""
Send a Responses API request with mixed tool types from Codex CLI:
- function (should be converted)
- namespace, web_search, image_generation (should be skipped, not rejected)
"""
if context.debug:
print("Submitting Responses API request with mixed Codex tool types...")
payload = CODEX_MIXED_TOOLS_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.responses_status = response.status
context.responses_text = await response.text()
@step("the Codex model catalog is requested")
@async_run_until_complete
async def step_codex_model_catalog_requested(context):
"""Fetch /v1/models and keep the raw response for Codex catalog assertions."""
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/models"
headers = {
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.get(url, headers=headers) as response:
context.codex_models_status = response.status
context.codex_models_text = await response.text()
@step("a probe responses request with empty input and max_output_tokens=1")
@async_run_until_complete
async def step_probe_empty_input(context):
"""Send a probe request with empty input and minimal tokens."""
if context.debug:
print("Submitting probe Responses API request with empty input...")
payload = PROBE_EMPTY_INPUT_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.probe_status = response.status
context.probe_text = await response.text()
@step("a responses request with invalid previous_response_id")
@async_run_until_complete
async def step_invalid_previous_response_id(context):
"""Send a request with an invalid previous_response_id to trigger an error."""
if context.debug:
print("Submitting Responses API request with invalid previous_response_id...")
payload = INVALID_PREVIOUS_RESPONSE_PAYLOAD.copy()
if hasattr(context, "model") and context.model:
payload["model"] = context.model
async with aiohttp.ClientSession() as session:
url = f"{context.base_url}/v1/responses"
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {context.user_api_key if hasattr(context, 'user_api_key') else 'test'}",
}
async with session.post(url, json=payload, headers=headers) as response:
context.prev_resp_status = response.status
context.prev_resp_text = await response.text()
@step("the mixed Codex tools response succeeds")
def step_mixed_tools_response_succeeds(context):
"""Assert HTTP 200, valid JSON, and required Responses fields."""
status = getattr(context, "responses_status", None)
text = getattr(context, "responses_text", None)
assert status == 200, f"Mixed tools request failed with status {status}. Expected 200. Response: {text[:200] if text else '(empty)'}"
assert text is not None, "No response body received"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON response: {e}") from e
assert "id" in data, "Expected 'id' in Responses response"
assert "output" in data, "Expected 'output' in Responses response"
@step("the Codex model catalog is compatible")
def step_codex_model_catalog_compatible(context):
"""Assert /v1/models preserves OpenAI shape and includes Codex ModelInfo."""
status = getattr(context, "codex_models_status", None)
text = getattr(context, "codex_models_text", None)
assert status == 200, f"Models request failed with status {status}. Response: {text[:200] if text else '(empty)'}"
assert text is not None, "No models response body"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON models response: {e}") from e
assert data.get("object") == "list", "Expected OpenAI-compatible object=list"
assert isinstance(data.get("data"), list), "Expected OpenAI-compatible data list"
assert isinstance(data.get("models"), list), "Expected Codex-compatible models list"
assert data["models"], "Expected at least one Codex model entry"
model = data["models"][0]
for field in [
"slug",
"display_name",
"supported_reasoning_levels",
"shell_type",
"visibility",
"supported_in_api",
"base_instructions",
"truncation_policy",
"context_window",
"input_modalities",
]:
assert field in model, f"Expected Codex model field '{field}'"
assert model["slug"] == "tinyllama-2"
assert model["visibility"] == "list"
assert model["supported_in_api"] is True
assert model["truncation_policy"]["mode"] == "tokens"
assert model["context_window"] == 256
@step("the probe response is accepted")
def step_probe_response_accepted(context):
"""Assert probe request succeeds with HTTP 200, valid JSON, id, and output."""
status = getattr(context, "probe_status", None)
text = getattr(context, "probe_text", None)
assert status is not None, "No probe response status"
assert text is not None, "No probe response body"
assert status == 200, f"Probe request failed with status {status}. Expected 200. Response: {text[:200]}"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON probe response: {e}") from e
assert "id" in data, "Expected 'id' in probe response"
assert "output" in data, "Expected 'output' in probe response"
@step("the previous_response_id request returns an error")
def step_previous_response_id_returns_error(context):
"""Assert that invalid previous_response_id returns an error response (4xx or 5xx)."""
status = getattr(context, "prev_resp_status", None)
text = getattr(context, "prev_resp_text", None)
assert status is not None, "No previous_response_id response status"
assert text is not None, "No previous_response_id response body"
assert status >= 400, f"Expected error status for invalid previous_response_id, got {status}"
try:
data = json.loads(text)
except json.JSONDecodeError as e:
raise AssertionError(f"Invalid JSON error response: {e}") from e
assert "error" in data, f"Expected 'error' field in error response. Got: {list(data.keys())}"
error_msg = data["error"].get("message", "") if isinstance(data["error"], dict) else str(data["error"])
assert "previous_response_id" in error_msg, f"Expected 'previous_response_id' in error message. Got: {error_msg}"