mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
chat: Cohere2MoE/North Code: parse unopened thinking under --reasoning off (follow-up to #1968) (#2012)
* Handle Cohere2MoE unopened thinking before tools * Cohere2MoE: route unopened thinking to reasoning_content; test in active target Follow-up to #1968. Gate extract_reasoning on reasoning_format only (drop the "&& enable_thinking" addition) so the unopened-thinking handling does not also change where an opened thinking block is routed. Under --reasoning off (enable_thinking=false, reasoning_format defaults to DEEPSEEK) an orphaned thinking block is now quarantined in reasoning_content with clean content and a native tool call, instead of leaking the thinking prose into the user-facing answer. Move the Cohere2MoE end-to-end parser cases into tests/test-chat-auto-parser.cpp, which CMake actually builds. tests/test-chat.cpp has been disabled in tests/CMakeLists.txt since #723, so cohere coverage added there never ran in CI; revert the local band-aids to that file. * Cohere2MoE: harden parser from NMC eval findings --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
This commit is contained in:
parent
5a4fa17947
commit
8686ea708b
@ -2144,22 +2144,42 @@ static common_chat_params common_chat_params_init_cohere2moe(const common_chat_t
|
||||
};
|
||||
|
||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||
// Surface reasoning as reasoning_content whenever the output format requests it
|
||||
// (Cohere2MoE has a non-empty THINK_START, so this matches the narrow condition
|
||||
// from the reasoning-delimiter work). Under --reasoning off the model may still
|
||||
// emit an (un)opened thinking block; keeping it in reasoning_content quarantines
|
||||
// it from the user-facing content rather than leaking it into the answer.
|
||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||
|
||||
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
|
||||
auto end = p.optional(p.literal(TURN_END)) + p.end();
|
||||
// Cohere2MoE can emit a stray text terminator after an action envelope.
|
||||
auto end = p.optional(p.literal(TEXT_END)) + p.optional(p.literal(TURN_END)) + p.end();
|
||||
|
||||
auto thinking_body = [&]() {
|
||||
return p.until_one_of({ THINK_END, TEXT_START, ACTION_START });
|
||||
};
|
||||
|
||||
common_peg_parser reasoning = p.eps();
|
||||
if (extract_reasoning) {
|
||||
reasoning = p.optional(p.literal(THINK_START) +
|
||||
p.reasoning(p.until_one_of({ THINK_END, TEXT_START, ACTION_START })) +
|
||||
p.optional(p.literal(THINK_END)));
|
||||
auto opened = p.literal(THINK_START) +
|
||||
p.reasoning(thinking_body()) +
|
||||
p.optional(p.literal(THINK_END));
|
||||
auto unopened = p.reasoning(thinking_body()) + p.literal(THINK_END);
|
||||
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||
} else if (inputs.enable_thinking) {
|
||||
auto opened = p.content(p.literal(THINK_START) +
|
||||
thinking_body() +
|
||||
p.optional(p.literal(THINK_END)));
|
||||
auto unopened = p.content(thinking_body() + p.literal(THINK_END));
|
||||
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||
} else {
|
||||
reasoning = p.optional(p.content(p.literal(THINK_START) +
|
||||
p.until_one_of({ THINK_END, TEXT_START, ACTION_START }) +
|
||||
p.optional(p.literal(THINK_END))));
|
||||
auto opened = p.literal(THINK_START) +
|
||||
p.content(thinking_body()) +
|
||||
p.optional(p.literal(THINK_END));
|
||||
auto unopened = p.content(thinking_body()) + p.literal(THINK_END);
|
||||
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||
}
|
||||
|
||||
auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END));
|
||||
|
||||
@ -62,6 +62,9 @@ static void test_nemotron_tool_format(testing & t);
|
||||
static void test_cohere_reasoning_detection(testing & t);
|
||||
static void test_cohere_analysis(testing & t);
|
||||
|
||||
// End-to-end Cohere2MoE (North Code) dedicated PEG parser coverage.
|
||||
static void test_cohere2moe_parser(testing & t);
|
||||
|
||||
// SmolLM3 template analysis tests
|
||||
static void test_smollm3_analysis(testing & t);
|
||||
|
||||
@ -98,6 +101,7 @@ int main(int argc, char * argv[]) {
|
||||
t.test("segments", test_marker_separation);
|
||||
t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
|
||||
t.test("cohere", test_cohere_analysis);
|
||||
t.test("cohere2moe_parser", test_cohere2moe_parser);
|
||||
t.test("nemotron", test_nemotron_analysis);
|
||||
t.test("smollm3", test_smollm3_analysis);
|
||||
t.test("standard_json_tools", test_standard_json_tools_formats);
|
||||
@ -1967,3 +1971,157 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
|
||||
}
|
||||
}
|
||||
|
||||
// End-to-end coverage for the dedicated Cohere2MoE (North Code) parser:
|
||||
// template apply -> PEG parse -> assert message. Exercises the reasoning-mode
|
||||
// matrix, including the unopened-thinking-under---reasoning-off case (#1968
|
||||
// follow-up). Routing rule: reasoning surfaces to reasoning_content whenever the
|
||||
// output format != NONE (DEEPSEEK), and folds into content under NONE.
|
||||
static void test_cohere2moe_parser(testing & t) {
|
||||
std::ifstream fin("models/templates/Cohere2MoE.jinja", std::ios::binary);
|
||||
std::ostringstream buf; buf << fin.rdbuf();
|
||||
std::string src = buf.str();
|
||||
t.assert_true("Cohere2MoE template loaded", src.length() > 0);
|
||||
if (src.empty()) {
|
||||
return;
|
||||
}
|
||||
|
||||
common_chat_templates_ptr tmpls(common_chat_templates_init(/* model = */ nullptr, src));
|
||||
|
||||
common_chat_tool special_function{
|
||||
/* .name = */ "special_function",
|
||||
/* .description = */ "I'm special",
|
||||
/* .parameters = */ R"({"type":"object","properties":{"arg1":{"type":"integer"}},"required":["arg1"]})",
|
||||
};
|
||||
common_chat_tool python{
|
||||
/* .name = */ "python",
|
||||
/* .description = */ "Run Python code",
|
||||
/* .parameters = */ R"({"type":"object","properties":{"code":{"type":"string"}},"required":["code"]})",
|
||||
};
|
||||
|
||||
common_chat_msg user;
|
||||
user.role = "user";
|
||||
user.content = "Hey";
|
||||
|
||||
const std::string act =
|
||||
"<|START_ACTION|>[\n"
|
||||
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||
"]<|END_ACTION|>";
|
||||
const std::string act_numeric_id =
|
||||
"<|START_ACTION|>[\n"
|
||||
" {\"tool_call_id\": 0, \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||
"]<|END_ACTION|>";
|
||||
const std::string text_resp = "<|START_TEXT|>Hello, world!<|END_TEXT|>";
|
||||
|
||||
struct cohere_case {
|
||||
const char * name;
|
||||
std::string input;
|
||||
common_reasoning_format reasoning_format;
|
||||
bool enable_thinking;
|
||||
common_chat_tool_choice tool_choice;
|
||||
std::string exp_content;
|
||||
std::string exp_reasoning;
|
||||
size_t exp_tool_calls;
|
||||
};
|
||||
|
||||
const std::vector<cohere_case> cases = {
|
||||
// #1968 follow-up fix: orphaned thinking (no <|START_THINKING|>) under --reasoning off.
|
||||
{ "unopened/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
|
||||
{ "unopened/NONE -> content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_NONE, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "I'm\nthinking", "", 1 },
|
||||
{ "unopened/DEEPSEEK/required -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_REQUIRED, "", "I'm\nthinking", 1 },
|
||||
{ "unopened text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "Hello, world!", "I'm\nthinking", 0 },
|
||||
{ "tool-choice-none text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_NONE, "Hello, world!", "I'm\nthinking", 0 },
|
||||
// Regression: reasoning enabled still routes thinking to reasoning_content.
|
||||
{ "thinking-on/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, true, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
|
||||
{ "thinking-on/NONE -> tagged content", "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_NONE, true, COMMON_CHAT_TOOL_CHOICE_AUTO,
|
||||
"<|START_THINKING|>I'm\nthinking<|END_THINKING|>", "", 1 },
|
||||
// Regression: existing #1968 shapes still parse to clean native tool calls.
|
||||
{ "bare-end/DEEPSEEK -> clean call", "<|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||
{ "bare-end/trailing-end-text/DEEPSEEK -> clean call", "<|END_THINKING|>" + act + "<|END_TEXT|>",
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||
{ "bare-end/numeric-id/DEEPSEEK -> clean call", "<|END_THINKING|>" + act_numeric_id,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||
{ "empty-block/DEEPSEEK -> clean call", "<|START_THINKING|><|END_THINKING|>" + act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||
{ "no-thinking/DEEPSEEK -> clean call", act,
|
||||
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||
};
|
||||
|
||||
for (const auto & c : cases) {
|
||||
common_chat_templates_inputs inputs;
|
||||
inputs.messages = { user };
|
||||
inputs.tools = { special_function };
|
||||
inputs.tool_choice = c.tool_choice;
|
||||
inputs.reasoning_format = c.reasoning_format;
|
||||
inputs.enable_thinking = c.enable_thinking;
|
||||
|
||||
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||
auto pos = params.generation_prompt.rfind("<|START_THINKING|>");
|
||||
|
||||
common_peg_arena arena;
|
||||
arena.load(params.parser);
|
||||
|
||||
common_chat_parser_params pp(params);
|
||||
if (pos != std::string::npos) {
|
||||
pp.generation_prompt = params.generation_prompt.substr(0, pos);
|
||||
}
|
||||
|
||||
auto msg = common_chat_peg_parse(arena, c.input, /* is_partial = */ false, pp);
|
||||
|
||||
t.assert_equal(std::string(c.name) + " : content", c.exp_content, msg.content);
|
||||
t.assert_equal(std::string(c.name) + " : reasoning", c.exp_reasoning, msg.reasoning_content);
|
||||
t.assert_equal(std::string(c.name) + " : tool calls", c.exp_tool_calls, msg.tool_calls.size());
|
||||
if (c.exp_tool_calls == 1 && msg.tool_calls.size() == 1) {
|
||||
t.assert_equal(std::string(c.name) + " : tool name", std::string("special_function"), msg.tool_calls[0].name);
|
||||
t.assert_equal(std::string(c.name) + " : tool id", std::string("0"), msg.tool_calls[0].id);
|
||||
}
|
||||
}
|
||||
|
||||
const std::string parallel_act =
|
||||
"<|START_ACTION|>[\n"
|
||||
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}},\n"
|
||||
" {\"tool_call_id\": \"1\", \"tool_name\": \"python\", \"parameters\": {\"code\": \"print('hey')\"}}\n"
|
||||
"]<|END_ACTION|>";
|
||||
|
||||
common_chat_templates_inputs parallel_inputs;
|
||||
parallel_inputs.messages = { user };
|
||||
parallel_inputs.tools = { special_function, python };
|
||||
parallel_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||
parallel_inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||
parallel_inputs.enable_thinking = false;
|
||||
parallel_inputs.parallel_tool_calls = true;
|
||||
|
||||
auto parallel_params = common_chat_templates_apply(tmpls.get(), parallel_inputs);
|
||||
auto parallel_pos = parallel_params.generation_prompt.rfind("<|START_THINKING|>");
|
||||
|
||||
common_peg_arena parallel_arena;
|
||||
parallel_arena.load(parallel_params.parser);
|
||||
|
||||
common_chat_parser_params parallel_pp(parallel_params);
|
||||
if (parallel_pos != std::string::npos) {
|
||||
parallel_pp.generation_prompt = parallel_params.generation_prompt.substr(0, parallel_pos);
|
||||
}
|
||||
|
||||
auto parallel_msg = common_chat_peg_parse(
|
||||
parallel_arena,
|
||||
"I'm\nthinking<|END_THINKING|>" + parallel_act,
|
||||
/* is_partial = */ false,
|
||||
parallel_pp);
|
||||
|
||||
t.assert_equal("parallel : content", std::string(""), parallel_msg.content);
|
||||
t.assert_equal("parallel : reasoning", std::string("I'm\nthinking"), parallel_msg.reasoning_content);
|
||||
t.assert_equal("parallel : tool calls", 2u, parallel_msg.tool_calls.size());
|
||||
if (parallel_msg.tool_calls.size() == 2) {
|
||||
t.assert_equal("parallel : tool 0 name", std::string("special_function"), parallel_msg.tool_calls[0].name);
|
||||
t.assert_equal("parallel : tool 0 id", std::string("0"), parallel_msg.tool_calls[0].id);
|
||||
t.assert_equal("parallel : tool 1 name", std::string("python"), parallel_msg.tool_calls[1].name);
|
||||
t.assert_equal("parallel : tool 1 id", std::string("1"), parallel_msg.tool_calls[1].id);
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user