From 8686ea708b68cd26746181a329fa0ba542e43c63 Mon Sep 17 00:00:00 2001 From: Joel Farthing Date: Wed, 24 Jun 2026 02:04:41 -0500 Subject: [PATCH] chat: Cohere2MoE/North Code: parse unopened thinking under --reasoning off (follow-up to #1968) (#2012) * Handle Cohere2MoE unopened thinking before tools * Cohere2MoE: route unopened thinking to reasoning_content; test in active target Follow-up to #1968. Gate extract_reasoning on reasoning_format only (drop the "&& enable_thinking" addition) so the unopened-thinking handling does not also change where an opened thinking block is routed. Under --reasoning off (enable_thinking=false, reasoning_format defaults to DEEPSEEK) an orphaned thinking block is now quarantined in reasoning_content with clean content and a native tool call, instead of leaking the thinking prose into the user-facing answer. Move the Cohere2MoE end-to-end parser cases into tests/test-chat-auto-parser.cpp, which CMake actually builds. tests/test-chat.cpp has been disabled in tests/CMakeLists.txt since #723, so cohere coverage added there never ran in CI; revert the local band-aids to that file. * Cohere2MoE: harden parser from NMC eval findings --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com> --- common/chat.cpp | 34 +++++-- tests/test-chat-auto-parser.cpp | 158 ++++++++++++++++++++++++++++++++ 2 files changed, 185 insertions(+), 7 deletions(-) diff --git a/common/chat.cpp b/common/chat.cpp index 60ef46b8..2dfbe238 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -2144,22 +2144,42 @@ static common_chat_params common_chat_params_init_cohere2moe(const common_chat_t }; auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); + // Surface reasoning as reasoning_content whenever the output format requests it + // (Cohere2MoE has a non-empty THINK_START, so this matches the narrow condition + // from the reasoning-delimiter work). Under --reasoning off the model may still + // emit an (un)opened thinking block; keeping it in reasoning_content quarantines + // it from the user-facing content rather than leaking it into the answer. auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START); - auto end = p.optional(p.literal(TURN_END)) + p.end(); + // Cohere2MoE can emit a stray text terminator after an action envelope. + auto end = p.optional(p.literal(TEXT_END)) + p.optional(p.literal(TURN_END)) + p.end(); + + auto thinking_body = [&]() { + return p.until_one_of({ THINK_END, TEXT_START, ACTION_START }); + }; common_peg_parser reasoning = p.eps(); if (extract_reasoning) { - reasoning = p.optional(p.literal(THINK_START) + - p.reasoning(p.until_one_of({ THINK_END, TEXT_START, ACTION_START })) + - p.optional(p.literal(THINK_END))); + auto opened = p.literal(THINK_START) + + p.reasoning(thinking_body()) + + p.optional(p.literal(THINK_END)); + auto unopened = p.reasoning(thinking_body()) + p.literal(THINK_END); + reasoning = p.optional(p.choice({ opened, unopened })); + } else if (inputs.enable_thinking) { + auto opened = p.content(p.literal(THINK_START) + + thinking_body() + + p.optional(p.literal(THINK_END))); + auto unopened = p.content(thinking_body() + p.literal(THINK_END)); + reasoning = p.optional(p.choice({ opened, unopened })); } else { - reasoning = p.optional(p.content(p.literal(THINK_START) + - p.until_one_of({ THINK_END, TEXT_START, ACTION_START }) + - p.optional(p.literal(THINK_END)))); + auto opened = p.literal(THINK_START) + + p.content(thinking_body()) + + p.optional(p.literal(THINK_END)); + auto unopened = p.content(thinking_body()) + p.literal(THINK_END); + reasoning = p.optional(p.choice({ opened, unopened })); } auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END)); diff --git a/tests/test-chat-auto-parser.cpp b/tests/test-chat-auto-parser.cpp index 1d96de71..1826658b 100644 --- a/tests/test-chat-auto-parser.cpp +++ b/tests/test-chat-auto-parser.cpp @@ -62,6 +62,9 @@ static void test_nemotron_tool_format(testing & t); static void test_cohere_reasoning_detection(testing & t); static void test_cohere_analysis(testing & t); +// End-to-end Cohere2MoE (North Code) dedicated PEG parser coverage. +static void test_cohere2moe_parser(testing & t); + // SmolLM3 template analysis tests static void test_smollm3_analysis(testing & t); @@ -98,6 +101,7 @@ int main(int argc, char * argv[]) { t.test("segments", test_marker_separation); t.test("seed_oss_diffs", test_seed_oss_tool_analysis); t.test("cohere", test_cohere_analysis); + t.test("cohere2moe_parser", test_cohere2moe_parser); t.test("nemotron", test_nemotron_analysis); t.test("smollm3", test_smollm3_analysis); t.test("standard_json_tools", test_standard_json_tools_formats); @@ -1967,3 +1971,157 @@ static void test_tagged_args_with_embedded_quotes(testing & t) { } } +// End-to-end coverage for the dedicated Cohere2MoE (North Code) parser: +// template apply -> PEG parse -> assert message. Exercises the reasoning-mode +// matrix, including the unopened-thinking-under---reasoning-off case (#1968 +// follow-up). Routing rule: reasoning surfaces to reasoning_content whenever the +// output format != NONE (DEEPSEEK), and folds into content under NONE. +static void test_cohere2moe_parser(testing & t) { + std::ifstream fin("models/templates/Cohere2MoE.jinja", std::ios::binary); + std::ostringstream buf; buf << fin.rdbuf(); + std::string src = buf.str(); + t.assert_true("Cohere2MoE template loaded", src.length() > 0); + if (src.empty()) { + return; + } + + common_chat_templates_ptr tmpls(common_chat_templates_init(/* model = */ nullptr, src)); + + common_chat_tool special_function{ + /* .name = */ "special_function", + /* .description = */ "I'm special", + /* .parameters = */ R"({"type":"object","properties":{"arg1":{"type":"integer"}},"required":["arg1"]})", + }; + common_chat_tool python{ + /* .name = */ "python", + /* .description = */ "Run Python code", + /* .parameters = */ R"({"type":"object","properties":{"code":{"type":"string"}},"required":["code"]})", + }; + + common_chat_msg user; + user.role = "user"; + user.content = "Hey"; + + const std::string act = + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>"; + const std::string act_numeric_id = + "<|START_ACTION|>[\n" + " {\"tool_call_id\": 0, \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n" + "]<|END_ACTION|>"; + const std::string text_resp = "<|START_TEXT|>Hello, world!<|END_TEXT|>"; + + struct cohere_case { + const char * name; + std::string input; + common_reasoning_format reasoning_format; + bool enable_thinking; + common_chat_tool_choice tool_choice; + std::string exp_content; + std::string exp_reasoning; + size_t exp_tool_calls; + }; + + const std::vector cases = { + // #1968 follow-up fix: orphaned thinking (no <|START_THINKING|>) under --reasoning off. + { "unopened/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 }, + { "unopened/NONE -> content", "I'm\nthinking<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_NONE, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "I'm\nthinking", "", 1 }, + { "unopened/DEEPSEEK/required -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_REQUIRED, "", "I'm\nthinking", 1 }, + { "unopened text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "Hello, world!", "I'm\nthinking", 0 }, + { "tool-choice-none text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_NONE, "Hello, world!", "I'm\nthinking", 0 }, + // Regression: reasoning enabled still routes thinking to reasoning_content. + { "thinking-on/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_DEEPSEEK, true, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 }, + { "thinking-on/NONE -> tagged content", "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_NONE, true, COMMON_CHAT_TOOL_CHOICE_AUTO, + "<|START_THINKING|>I'm\nthinking<|END_THINKING|>", "", 1 }, + // Regression: existing #1968 shapes still parse to clean native tool calls. + { "bare-end/DEEPSEEK -> clean call", "<|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 }, + { "bare-end/trailing-end-text/DEEPSEEK -> clean call", "<|END_THINKING|>" + act + "<|END_TEXT|>", + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 }, + { "bare-end/numeric-id/DEEPSEEK -> clean call", "<|END_THINKING|>" + act_numeric_id, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 }, + { "empty-block/DEEPSEEK -> clean call", "<|START_THINKING|><|END_THINKING|>" + act, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 }, + { "no-thinking/DEEPSEEK -> clean call", act, + COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 }, + }; + + for (const auto & c : cases) { + common_chat_templates_inputs inputs; + inputs.messages = { user }; + inputs.tools = { special_function }; + inputs.tool_choice = c.tool_choice; + inputs.reasoning_format = c.reasoning_format; + inputs.enable_thinking = c.enable_thinking; + + auto params = common_chat_templates_apply(tmpls.get(), inputs); + auto pos = params.generation_prompt.rfind("<|START_THINKING|>"); + + common_peg_arena arena; + arena.load(params.parser); + + common_chat_parser_params pp(params); + if (pos != std::string::npos) { + pp.generation_prompt = params.generation_prompt.substr(0, pos); + } + + auto msg = common_chat_peg_parse(arena, c.input, /* is_partial = */ false, pp); + + t.assert_equal(std::string(c.name) + " : content", c.exp_content, msg.content); + t.assert_equal(std::string(c.name) + " : reasoning", c.exp_reasoning, msg.reasoning_content); + t.assert_equal(std::string(c.name) + " : tool calls", c.exp_tool_calls, msg.tool_calls.size()); + if (c.exp_tool_calls == 1 && msg.tool_calls.size() == 1) { + t.assert_equal(std::string(c.name) + " : tool name", std::string("special_function"), msg.tool_calls[0].name); + t.assert_equal(std::string(c.name) + " : tool id", std::string("0"), msg.tool_calls[0].id); + } + } + + const std::string parallel_act = + "<|START_ACTION|>[\n" + " {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}},\n" + " {\"tool_call_id\": \"1\", \"tool_name\": \"python\", \"parameters\": {\"code\": \"print('hey')\"}}\n" + "]<|END_ACTION|>"; + + common_chat_templates_inputs parallel_inputs; + parallel_inputs.messages = { user }; + parallel_inputs.tools = { special_function, python }; + parallel_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO; + parallel_inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK; + parallel_inputs.enable_thinking = false; + parallel_inputs.parallel_tool_calls = true; + + auto parallel_params = common_chat_templates_apply(tmpls.get(), parallel_inputs); + auto parallel_pos = parallel_params.generation_prompt.rfind("<|START_THINKING|>"); + + common_peg_arena parallel_arena; + parallel_arena.load(parallel_params.parser); + + common_chat_parser_params parallel_pp(parallel_params); + if (parallel_pos != std::string::npos) { + parallel_pp.generation_prompt = parallel_params.generation_prompt.substr(0, parallel_pos); + } + + auto parallel_msg = common_chat_peg_parse( + parallel_arena, + "I'm\nthinking<|END_THINKING|>" + parallel_act, + /* is_partial = */ false, + parallel_pp); + + t.assert_equal("parallel : content", std::string(""), parallel_msg.content); + t.assert_equal("parallel : reasoning", std::string("I'm\nthinking"), parallel_msg.reasoning_content); + t.assert_equal("parallel : tool calls", 2u, parallel_msg.tool_calls.size()); + if (parallel_msg.tool_calls.size() == 2) { + t.assert_equal("parallel : tool 0 name", std::string("special_function"), parallel_msg.tool_calls[0].name); + t.assert_equal("parallel : tool 0 id", std::string("0"), parallel_msg.tool_calls[0].id); + t.assert_equal("parallel : tool 1 name", std::string("python"), parallel_msg.tool_calls[1].name); + t.assert_equal("parallel : tool 1 id", std::string("1"), parallel_msg.tool_calls[1].id); + } +}