mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
chat: Cohere2MoE/North Code: parse unopened thinking under --reasoning off (follow-up to #1968) (#2012)
* Handle Cohere2MoE unopened thinking before tools * Cohere2MoE: route unopened thinking to reasoning_content; test in active target Follow-up to #1968. Gate extract_reasoning on reasoning_format only (drop the "&& enable_thinking" addition) so the unopened-thinking handling does not also change where an opened thinking block is routed. Under --reasoning off (enable_thinking=false, reasoning_format defaults to DEEPSEEK) an orphaned thinking block is now quarantined in reasoning_content with clean content and a native tool call, instead of leaking the thinking prose into the user-facing answer. Move the Cohere2MoE end-to-end parser cases into tests/test-chat-auto-parser.cpp, which CMake actually builds. tests/test-chat.cpp has been disabled in tests/CMakeLists.txt since #723, so cohere coverage added there never ran in CI; revert the local band-aids to that file. * Cohere2MoE: harden parser from NMC eval findings --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
This commit is contained in:
parent
5a4fa17947
commit
8686ea708b
@ -2144,22 +2144,42 @@ static common_chat_params common_chat_params_init_cohere2moe(const common_chat_t
|
|||||||
};
|
};
|
||||||
|
|
||||||
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
auto has_tools = inputs.tools.is_array() && !inputs.tools.empty();
|
||||||
|
// Surface reasoning as reasoning_content whenever the output format requests it
|
||||||
|
// (Cohere2MoE has a non-empty THINK_START, so this matches the narrow condition
|
||||||
|
// from the reasoning-delimiter work). Under --reasoning off the model may still
|
||||||
|
// emit an (un)opened thinking block; keeping it in reasoning_content quarantines
|
||||||
|
// it from the user-facing content rather than leaking it into the answer.
|
||||||
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
auto extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||||
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
auto include_grammar = has_tools && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE;
|
||||||
|
|
||||||
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
auto parser = build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||||
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
|
auto generation_prompt = p.prefix(inputs.generation_prompt, THINK_START);
|
||||||
auto end = p.optional(p.literal(TURN_END)) + p.end();
|
// Cohere2MoE can emit a stray text terminator after an action envelope.
|
||||||
|
auto end = p.optional(p.literal(TEXT_END)) + p.optional(p.literal(TURN_END)) + p.end();
|
||||||
|
|
||||||
|
auto thinking_body = [&]() {
|
||||||
|
return p.until_one_of({ THINK_END, TEXT_START, ACTION_START });
|
||||||
|
};
|
||||||
|
|
||||||
common_peg_parser reasoning = p.eps();
|
common_peg_parser reasoning = p.eps();
|
||||||
if (extract_reasoning) {
|
if (extract_reasoning) {
|
||||||
reasoning = p.optional(p.literal(THINK_START) +
|
auto opened = p.literal(THINK_START) +
|
||||||
p.reasoning(p.until_one_of({ THINK_END, TEXT_START, ACTION_START })) +
|
p.reasoning(thinking_body()) +
|
||||||
p.optional(p.literal(THINK_END)));
|
p.optional(p.literal(THINK_END));
|
||||||
|
auto unopened = p.reasoning(thinking_body()) + p.literal(THINK_END);
|
||||||
|
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||||
|
} else if (inputs.enable_thinking) {
|
||||||
|
auto opened = p.content(p.literal(THINK_START) +
|
||||||
|
thinking_body() +
|
||||||
|
p.optional(p.literal(THINK_END)));
|
||||||
|
auto unopened = p.content(thinking_body() + p.literal(THINK_END));
|
||||||
|
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||||
} else {
|
} else {
|
||||||
reasoning = p.optional(p.content(p.literal(THINK_START) +
|
auto opened = p.literal(THINK_START) +
|
||||||
p.until_one_of({ THINK_END, TEXT_START, ACTION_START }) +
|
p.content(thinking_body()) +
|
||||||
p.optional(p.literal(THINK_END))));
|
p.optional(p.literal(THINK_END));
|
||||||
|
auto unopened = p.content(thinking_body()) + p.literal(THINK_END);
|
||||||
|
reasoning = p.optional(p.choice({ opened, unopened }));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END));
|
auto text_content = p.literal(TEXT_START) + p.content(p.until(TEXT_END)) + p.optional(p.literal(TEXT_END));
|
||||||
|
|||||||
@ -62,6 +62,9 @@ static void test_nemotron_tool_format(testing & t);
|
|||||||
static void test_cohere_reasoning_detection(testing & t);
|
static void test_cohere_reasoning_detection(testing & t);
|
||||||
static void test_cohere_analysis(testing & t);
|
static void test_cohere_analysis(testing & t);
|
||||||
|
|
||||||
|
// End-to-end Cohere2MoE (North Code) dedicated PEG parser coverage.
|
||||||
|
static void test_cohere2moe_parser(testing & t);
|
||||||
|
|
||||||
// SmolLM3 template analysis tests
|
// SmolLM3 template analysis tests
|
||||||
static void test_smollm3_analysis(testing & t);
|
static void test_smollm3_analysis(testing & t);
|
||||||
|
|
||||||
@ -98,6 +101,7 @@ int main(int argc, char * argv[]) {
|
|||||||
t.test("segments", test_marker_separation);
|
t.test("segments", test_marker_separation);
|
||||||
t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
|
t.test("seed_oss_diffs", test_seed_oss_tool_analysis);
|
||||||
t.test("cohere", test_cohere_analysis);
|
t.test("cohere", test_cohere_analysis);
|
||||||
|
t.test("cohere2moe_parser", test_cohere2moe_parser);
|
||||||
t.test("nemotron", test_nemotron_analysis);
|
t.test("nemotron", test_nemotron_analysis);
|
||||||
t.test("smollm3", test_smollm3_analysis);
|
t.test("smollm3", test_smollm3_analysis);
|
||||||
t.test("standard_json_tools", test_standard_json_tools_formats);
|
t.test("standard_json_tools", test_standard_json_tools_formats);
|
||||||
@ -1967,3 +1971,157 @@ static void test_tagged_args_with_embedded_quotes(testing & t) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// End-to-end coverage for the dedicated Cohere2MoE (North Code) parser:
|
||||||
|
// template apply -> PEG parse -> assert message. Exercises the reasoning-mode
|
||||||
|
// matrix, including the unopened-thinking-under---reasoning-off case (#1968
|
||||||
|
// follow-up). Routing rule: reasoning surfaces to reasoning_content whenever the
|
||||||
|
// output format != NONE (DEEPSEEK), and folds into content under NONE.
|
||||||
|
static void test_cohere2moe_parser(testing & t) {
|
||||||
|
std::ifstream fin("models/templates/Cohere2MoE.jinja", std::ios::binary);
|
||||||
|
std::ostringstream buf; buf << fin.rdbuf();
|
||||||
|
std::string src = buf.str();
|
||||||
|
t.assert_true("Cohere2MoE template loaded", src.length() > 0);
|
||||||
|
if (src.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
common_chat_templates_ptr tmpls(common_chat_templates_init(/* model = */ nullptr, src));
|
||||||
|
|
||||||
|
common_chat_tool special_function{
|
||||||
|
/* .name = */ "special_function",
|
||||||
|
/* .description = */ "I'm special",
|
||||||
|
/* .parameters = */ R"({"type":"object","properties":{"arg1":{"type":"integer"}},"required":["arg1"]})",
|
||||||
|
};
|
||||||
|
common_chat_tool python{
|
||||||
|
/* .name = */ "python",
|
||||||
|
/* .description = */ "Run Python code",
|
||||||
|
/* .parameters = */ R"({"type":"object","properties":{"code":{"type":"string"}},"required":["code"]})",
|
||||||
|
};
|
||||||
|
|
||||||
|
common_chat_msg user;
|
||||||
|
user.role = "user";
|
||||||
|
user.content = "Hey";
|
||||||
|
|
||||||
|
const std::string act =
|
||||||
|
"<|START_ACTION|>[\n"
|
||||||
|
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||||
|
"]<|END_ACTION|>";
|
||||||
|
const std::string act_numeric_id =
|
||||||
|
"<|START_ACTION|>[\n"
|
||||||
|
" {\"tool_call_id\": 0, \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}}\n"
|
||||||
|
"]<|END_ACTION|>";
|
||||||
|
const std::string text_resp = "<|START_TEXT|>Hello, world!<|END_TEXT|>";
|
||||||
|
|
||||||
|
struct cohere_case {
|
||||||
|
const char * name;
|
||||||
|
std::string input;
|
||||||
|
common_reasoning_format reasoning_format;
|
||||||
|
bool enable_thinking;
|
||||||
|
common_chat_tool_choice tool_choice;
|
||||||
|
std::string exp_content;
|
||||||
|
std::string exp_reasoning;
|
||||||
|
size_t exp_tool_calls;
|
||||||
|
};
|
||||||
|
|
||||||
|
const std::vector<cohere_case> cases = {
|
||||||
|
// #1968 follow-up fix: orphaned thinking (no <|START_THINKING|>) under --reasoning off.
|
||||||
|
{ "unopened/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
|
||||||
|
{ "unopened/NONE -> content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_NONE, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "I'm\nthinking", "", 1 },
|
||||||
|
{ "unopened/DEEPSEEK/required -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_REQUIRED, "", "I'm\nthinking", 1 },
|
||||||
|
{ "unopened text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "Hello, world!", "I'm\nthinking", 0 },
|
||||||
|
{ "tool-choice-none text/DEEPSEEK -> reasoning_content + content", "I'm\nthinking<|END_THINKING|>" + text_resp,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_NONE, "Hello, world!", "I'm\nthinking", 0 },
|
||||||
|
// Regression: reasoning enabled still routes thinking to reasoning_content.
|
||||||
|
{ "thinking-on/DEEPSEEK -> reasoning_content", "I'm\nthinking<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, true, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "I'm\nthinking", 1 },
|
||||||
|
{ "thinking-on/NONE -> tagged content", "<|START_THINKING|>I'm\nthinking<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_NONE, true, COMMON_CHAT_TOOL_CHOICE_AUTO,
|
||||||
|
"<|START_THINKING|>I'm\nthinking<|END_THINKING|>", "", 1 },
|
||||||
|
// Regression: existing #1968 shapes still parse to clean native tool calls.
|
||||||
|
{ "bare-end/DEEPSEEK -> clean call", "<|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||||
|
{ "bare-end/trailing-end-text/DEEPSEEK -> clean call", "<|END_THINKING|>" + act + "<|END_TEXT|>",
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||||
|
{ "bare-end/numeric-id/DEEPSEEK -> clean call", "<|END_THINKING|>" + act_numeric_id,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||||
|
{ "empty-block/DEEPSEEK -> clean call", "<|START_THINKING|><|END_THINKING|>" + act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||||
|
{ "no-thinking/DEEPSEEK -> clean call", act,
|
||||||
|
COMMON_REASONING_FORMAT_DEEPSEEK, false, COMMON_CHAT_TOOL_CHOICE_AUTO, "", "", 1 },
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const auto & c : cases) {
|
||||||
|
common_chat_templates_inputs inputs;
|
||||||
|
inputs.messages = { user };
|
||||||
|
inputs.tools = { special_function };
|
||||||
|
inputs.tool_choice = c.tool_choice;
|
||||||
|
inputs.reasoning_format = c.reasoning_format;
|
||||||
|
inputs.enable_thinking = c.enable_thinking;
|
||||||
|
|
||||||
|
auto params = common_chat_templates_apply(tmpls.get(), inputs);
|
||||||
|
auto pos = params.generation_prompt.rfind("<|START_THINKING|>");
|
||||||
|
|
||||||
|
common_peg_arena arena;
|
||||||
|
arena.load(params.parser);
|
||||||
|
|
||||||
|
common_chat_parser_params pp(params);
|
||||||
|
if (pos != std::string::npos) {
|
||||||
|
pp.generation_prompt = params.generation_prompt.substr(0, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto msg = common_chat_peg_parse(arena, c.input, /* is_partial = */ false, pp);
|
||||||
|
|
||||||
|
t.assert_equal(std::string(c.name) + " : content", c.exp_content, msg.content);
|
||||||
|
t.assert_equal(std::string(c.name) + " : reasoning", c.exp_reasoning, msg.reasoning_content);
|
||||||
|
t.assert_equal(std::string(c.name) + " : tool calls", c.exp_tool_calls, msg.tool_calls.size());
|
||||||
|
if (c.exp_tool_calls == 1 && msg.tool_calls.size() == 1) {
|
||||||
|
t.assert_equal(std::string(c.name) + " : tool name", std::string("special_function"), msg.tool_calls[0].name);
|
||||||
|
t.assert_equal(std::string(c.name) + " : tool id", std::string("0"), msg.tool_calls[0].id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::string parallel_act =
|
||||||
|
"<|START_ACTION|>[\n"
|
||||||
|
" {\"tool_call_id\": \"0\", \"tool_name\": \"special_function\", \"parameters\": {\"arg1\": 1}},\n"
|
||||||
|
" {\"tool_call_id\": \"1\", \"tool_name\": \"python\", \"parameters\": {\"code\": \"print('hey')\"}}\n"
|
||||||
|
"]<|END_ACTION|>";
|
||||||
|
|
||||||
|
common_chat_templates_inputs parallel_inputs;
|
||||||
|
parallel_inputs.messages = { user };
|
||||||
|
parallel_inputs.tools = { special_function, python };
|
||||||
|
parallel_inputs.tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
|
||||||
|
parallel_inputs.reasoning_format = COMMON_REASONING_FORMAT_DEEPSEEK;
|
||||||
|
parallel_inputs.enable_thinking = false;
|
||||||
|
parallel_inputs.parallel_tool_calls = true;
|
||||||
|
|
||||||
|
auto parallel_params = common_chat_templates_apply(tmpls.get(), parallel_inputs);
|
||||||
|
auto parallel_pos = parallel_params.generation_prompt.rfind("<|START_THINKING|>");
|
||||||
|
|
||||||
|
common_peg_arena parallel_arena;
|
||||||
|
parallel_arena.load(parallel_params.parser);
|
||||||
|
|
||||||
|
common_chat_parser_params parallel_pp(parallel_params);
|
||||||
|
if (parallel_pos != std::string::npos) {
|
||||||
|
parallel_pp.generation_prompt = parallel_params.generation_prompt.substr(0, parallel_pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parallel_msg = common_chat_peg_parse(
|
||||||
|
parallel_arena,
|
||||||
|
"I'm\nthinking<|END_THINKING|>" + parallel_act,
|
||||||
|
/* is_partial = */ false,
|
||||||
|
parallel_pp);
|
||||||
|
|
||||||
|
t.assert_equal("parallel : content", std::string(""), parallel_msg.content);
|
||||||
|
t.assert_equal("parallel : reasoning", std::string("I'm\nthinking"), parallel_msg.reasoning_content);
|
||||||
|
t.assert_equal("parallel : tool calls", 2u, parallel_msg.tool_calls.size());
|
||||||
|
if (parallel_msg.tool_calls.size() == 2) {
|
||||||
|
t.assert_equal("parallel : tool 0 name", std::string("special_function"), parallel_msg.tool_calls[0].name);
|
||||||
|
t.assert_equal("parallel : tool 0 id", std::string("0"), parallel_msg.tool_calls[0].id);
|
||||||
|
t.assert_equal("parallel : tool 1 name", std::string("python"), parallel_msg.tool_calls[1].name);
|
||||||
|
t.assert_equal("parallel : tool 1 id", std::string("1"), parallel_msg.tool_calls[1].id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user