chat: Cohere2MoE/North Code: parse unopened thinking under --reasoning off (follow-up to #1968) (#2012)

* Handle Cohere2MoE unopened thinking before tools

* Cohere2MoE: route unopened thinking to reasoning_content; test in active target

Follow-up to #1968. Gate extract_reasoning on reasoning_format only (drop the
"&& enable_thinking" addition) so the unopened-thinking handling does not also
change where an opened thinking block is routed. Under --reasoning off
(enable_thinking=false, reasoning_format defaults to DEEPSEEK) an orphaned
thinking block is now quarantined in reasoning_content with clean content and a
native tool call, instead of leaking the thinking prose into the user-facing
answer.

Move the Cohere2MoE end-to-end parser cases into tests/test-chat-auto-parser.cpp,
which CMake actually builds. tests/test-chat.cpp has been disabled in
tests/CMakeLists.txt since #723, so cohere coverage added there never ran in CI;
revert the local band-aids to that file.

* Cohere2MoE: harden parser from NMC eval findings

---------

Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
This commit is contained in:
Joel Farthing 2026-06-24 02:04:41 -05:00 committed by GitHub
parent 5a4fa17947
commit 8686ea708b
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 185 additions and 7 deletions

View File

@ -2144,22 +2144,42 @@ static common_chat_params common_chat_params_init_cohere2moe(const common_chat_t
}; };
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty(); auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
// Surface reasoning as reasoning_content whenever the output format requests it
// (Cohere2MoE has a non-empty THINK_START, so this matches the narrow condition
// from the reasoning-delimiter work). Under --reasoning off the model may still
// emit an (un)opened thinking block; keeping it in reasoning_content quarantines
// it from the user-facing content rather than leaking it into the answer.
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE; auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE; auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) { auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START); auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
auto end = p.optional(p.literal(TURN_END)) + p.end(); // Cohere2MoE can emit a stray text terminator after an action envelope.
auto end = p.optional(p.literal(TEXT_END)) + p.optional(p.literal(TURN_END)) + p.end();
auto thinking_body = [&]() {
return p.until_one_of({ THINK_END, TEXT_START, ACTION_START });
};
common_peg_parser reasoning = p.eps(); common_peg_parser reasoning = p.eps();
if (extract_reasoning) { if (extract_reasoning) {
reasoning = p.optional(p.literal(THINK_START) + auto opened = p.literal(THINK_START) +
p.reasoning(p.until_one_of({ THINK_END, TEXT_START, ACTION_START })) + p.reasoning(thinking_body()) +
p.optional(p.literal(THINK_END))); p.optional(p.literal(THINK_END));
auto unopened = p.reasoning(thinking_body()) + p.literal(THINK_END);
reasoning = p.optional(p.choice({ opened, unopened }));
} else if (inputs.enable_thinking) {
auto opened = p.content(p.literal(THINK_START) +
thinking_body() +
p.optional(p.literal(THINK_END)));
auto unopened = p.content(thinking_body() + p.literal(THINK_END));
reasoning = p.optional(p.choice({ opened, unopened }));
} else { } else {
reasoning = p.optional(p.content(p.literal(THINK_START) + auto opened = p.literal(THINK_START) +
p.until_one_of({ THINK_END, TEXT_START, ACTION_START }) + p.content(thinking_body()) +
p.optional(p.literal(THINK_END)))); p.optional(p.literal(THINK_END));
auto unopened = p.content(thinking_body()) + p.literal(THINK_END);
reasoning = p.optional(p.choice({ opened, unopened }));
} }
auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END)); auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END));

View File

@ -62,6 +62,9 @@ static void test_nemotron_tool_format(testing & t);
static void test_cohere_reasoning_detection(testing & t); static void test_cohere_reasoning_detection(testing & t);
static void test_cohere_analysis(testing & t); static void test_cohere_analysis(testing & t);
// End-to-end Cohere2MoE (North Code) dedicated PEG parser coverage.
static void test_cohere2moe_parser(testing & t);
// SmolLM3 template analysis tests // SmolLM3 template analysis tests
static void test_smollm3_analysis(testing & t); static void test_smollm3_analysis(testing & t);
@ -98,6 +101,7 @@ int main(int argc, char * argv[]) {
t.test("segments", test_marker_separation); t.test("segments", test_marker_separation);
t.test("seed_oss_diffs", test_seed_oss_tool_analysis); t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
t.test("cohere", test_cohere_analysis); t.test("cohere", test_cohere_analysis);
t.test("cohere2moe_parser", test_cohere2moe_parser);
t.test("nemotron", test_nemotron_analysis); t.test("nemotron", test_nemotron_analysis);
t.test("smollm3", test_smollm3_analysis); t.test("smollm3", test_smollm3_analysis);
t.test("standard_json_tools", test_standard_json_tools_formats); t.test("standard_json_tools", test_standard_json_tools_formats);
@ -1967,3 +1971,157 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
} }
} }
// End-to-end coverage for the dedicated Cohere2MoE (North Code) parser:
// template apply -> PEG parse -> assert message. Exercises the reasoning-mode
// matrix, including the unopened-thinking-under---reasoning-off case (#1968
// follow-up). Routing rule: reasoning surfaces to reasoning_content whenever the
// output format != NONE (DEEPSEEK), and folds into content under NONE.
static void test_cohere2moe_parser(testing & t) {
std::ifstream fin("models/templates/Cohere2MoE.jinja", std::ios::binary);
std::ostringstream buf; buf << fin.rdbuf();
std::string src = buf.str();
t.assert_true("Cohere2MoE template loaded", src.length() > 0);
if (src.empty()) {
return;
}
common_chat_templates_ptr tmpls(common_chat_templates_init(/* model = */ nullptr, src));
common_chat_tool special_function{
/* .name = */ "special_function",
/* .description = */ "I'm special",
/* .parameters = */ R"({"type":"object","properties":{"arg1":{"type":"integer"}},"required":["arg1"]})",
};
common_chat_tool python{
/* .name = */ "python",
/* .description = */ "Run Python code",
/* .parameters = */ R"({"type":"object","properties":{"code":{"type":"string"}},"required":["code"]})",
};
common_chat_msg user;
user.role = "user";
user.content = "Hey";
const std::string act =
"<|START_ACTION|>[\n"
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
"]<|END_ACTION|>";
const std::string act_numeric_id =
"<|START_ACTION|>[\n"
" {\"tool_call_id\": 0, \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
"]<|END_ACTION|>";
const std::string text_resp = "<|START_TEXT|>Hello, world!<|END_TEXT|>";
struct cohere_case {
const char * name;
std::string input;
common_reasoning_format reasoning_format;
bool enable_thinking;
common_chat_tool_choice tool_choice;
std::string exp_content;
std::string exp_reasoning;
size_t exp_tool_calls;
};
const std::vector<cohere_case> cases = {
// #1968 follow-up fix: orphaned thinking (no <|START_THINKING|>) under --reasoning off.
{ "unopened/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
{ "unopened/NONE -> content", "I'm\nthinking<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_NONE, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "I'm\nthinking", "", 1 },
{ "unopened/DEEPSEEK/required -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_REQUIRED, "", "I'm\nthinking", 1 },
{ "unopened text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "Hello, world!", "I'm\nthinking", 0 },
{ "tool-choice-none text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_NONE, "Hello, world!", "I'm\nthinking", 0 },
// Regression: reasoning enabled still routes thinking to reasoning_content.
{ "thinking-on/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_DEEPSEEK, true, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
{ "thinking-on/NONE -> tagged content", "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_NONE, true, COMMON_CHAT_TOOL_CHOICE_AUTO,
"<|START_THINKING|>I'm\nthinking<|END_THINKING|>", "", 1 },
// Regression: existing #1968 shapes still parse to clean native tool calls.
{ "bare-end/DEEPSEEK -> clean call", "<|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
{ "bare-end/trailing-end-text/DEEPSEEK -> clean call", "<|END_THINKING|>" + act + "<|END_TEXT|>",
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
{ "bare-end/numeric-id/DEEPSEEK -> clean call", "<|END_THINKING|>" + act_numeric_id,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
{ "empty-block/DEEPSEEK -> clean call", "<|START_THINKING|><|END_THINKING|>" + act,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
{ "no-thinking/DEEPSEEK -> clean call", act,
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
};
for (const auto & c : cases) {
common_chat_templates_inputs inputs;
inputs.messages = { user };
inputs.tools = { special_function };
inputs.tool_choice = c.tool_choice;
inputs.reasoning_format = c.reasoning_format;
inputs.enable_thinking = c.enable_thinking;
auto params = common_chat_templates_apply(tmpls.get(), inputs);
auto pos = params.generation_prompt.rfind("<|START_THINKING|>");
common_peg_arena arena;
arena.load(params.parser);
common_chat_parser_params pp(params);
if (pos != std::string::npos) {
pp.generation_prompt = params.generation_prompt.substr(0, pos);
}
auto msg = common_chat_peg_parse(arena, c.input, /* is_partial = */ false, pp);
t.assert_equal(std::string(c.name) + " : content", c.exp_content, msg.content);
t.assert_equal(std::string(c.name) + " : reasoning", c.exp_reasoning, msg.reasoning_content);
t.assert_equal(std::string(c.name) + " : tool calls", c.exp_tool_calls, msg.tool_calls.size());
if (c.exp_tool_calls == 1 && msg.tool_calls.size() == 1) {
t.assert_equal(std::string(c.name) + " : tool name", std::string("special_function"), msg.tool_calls[0].name);
t.assert_equal(std::string(c.name) + " : tool id", std::string("0"), msg.tool_calls[0].id);
}
}
const std::string parallel_act =
"<|START_ACTION|>[\n"
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}},\n"
" {\"tool_call_id\": \"1\", \"tool_name\": \"python\", \"parameters\": {\"code\": \"print('hey')\"}}\n"
"]<|END_ACTION|>";
common_chat_templates_inputs parallel_inputs;
parallel_inputs.messages = { user };
parallel_inputs.tools = { special_function, python };
parallel_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
parallel_inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
parallel_inputs.enable_thinking = false;
parallel_inputs.parallel_tool_calls = true;
auto parallel_params = common_chat_templates_apply(tmpls.get(), parallel_inputs);
auto parallel_pos = parallel_params.generation_prompt.rfind("<|START_THINKING|>");
common_peg_arena parallel_arena;
parallel_arena.load(parallel_params.parser);
common_chat_parser_params parallel_pp(parallel_params);
if (parallel_pos != std::string::npos) {
parallel_pp.generation_prompt = parallel_params.generation_prompt.substr(0, parallel_pos);
}
auto parallel_msg = common_chat_peg_parse(
parallel_arena,
"I'm\nthinking<|END_THINKING|>" + parallel_act,
/* is_partial = */ false,
parallel_pp);
t.assert_equal("parallel : content", std::string(""), parallel_msg.content);
t.assert_equal("parallel : reasoning", std::string("I'm\nthinking"), parallel_msg.reasoning_content);
t.assert_equal("parallel : tool calls", 2u, parallel_msg.tool_calls.size());
if (parallel_msg.tool_calls.size() == 2) {
t.assert_equal("parallel : tool 0 name", std::string("special_function"), parallel_msg.tool_calls[0].name);
t.assert_equal("parallel : tool 0 id", std::string("0"), parallel_msg.tool_calls[0].id);
t.assert_equal("parallel : tool 1 name", std::string("python"), parallel_msg.tool_calls[1].name);
t.assert_equal("parallel : tool 1 id", std::string("1"), parallel_msg.tool_calls[1].id);
}
}