mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
common: handle Laguna chat delimiters (#1943)
* common: handle Laguna chat delimiters * common: limit tool parser changes to end-delimited content --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
This commit is contained in:
parent
366e478cb6
commit
71d5aa21f7
@ -93,7 +93,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
|
||||
}
|
||||
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||
parser_build_context ctx(p, inputs);
|
||||
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
||||
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && inputs.enable_thinking;
|
||||
|
||||
ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
|
||||
ctx.content = &content;
|
||||
@ -155,6 +155,16 @@ common_peg_parser analyze_content::build_parser(parser_build_context & ctx) cons
|
||||
}
|
||||
return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
|
||||
}
|
||||
if (is_end_delimited()) {
|
||||
auto content = p.choice({
|
||||
p.content(p.until(end)) + p.optspace(end),
|
||||
p.content(p.rest()),
|
||||
});
|
||||
if (ctx.extracting_reasoning) {
|
||||
return ctx.reasoning_parser + p.space() + content + p.end();
|
||||
}
|
||||
return content + p.end();
|
||||
}
|
||||
return ctx.reasoning_parser + p.content(p.rest()) + p.end();
|
||||
}
|
||||
|
||||
@ -216,7 +226,6 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
|
||||
auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
|
||||
return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
|
||||
}
|
||||
|
||||
std::string tool_start = "{";
|
||||
if (!format.section_start.empty()) {
|
||||
tool_start = format.section_start;
|
||||
@ -224,7 +233,12 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
|
||||
tool_start = format.per_call_start;
|
||||
}
|
||||
|
||||
return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
|
||||
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||
return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
|
||||
}
|
||||
|
||||
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||
return ctx.reasoning_parser + p.space() + p.optional(p.content(p.until(tool_start))) + tools_parser + content_end + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
|
||||
@ -333,7 +347,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context
|
||||
|
||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
|
||||
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||
return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
|
||||
}
|
||||
|
||||
common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
|
||||
@ -464,7 +484,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
||||
|
||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
|
||||
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||
}
|
||||
|
||||
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||
return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
|
||||
}
|
||||
|
||||
} // namespace autoparser
|
||||
|
||||
@ -101,6 +101,7 @@ enum class content_mode {
|
||||
PLAIN, // No content markers
|
||||
ALWAYS_WRAPPED, // Content always wrapped with markers
|
||||
WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
|
||||
END_DELIMITED, // Content is terminated by a marker but has no start marker
|
||||
};
|
||||
|
||||
inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
||||
@ -111,6 +112,8 @@ inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
||||
return os << "ALWAYS_WRAPPED";
|
||||
case content_mode::WRAPPED_WITH_REASONING:
|
||||
return os << "WRAPPED_WITH_REASONING";
|
||||
case content_mode::END_DELIMITED:
|
||||
return os << "END_DELIMITED";
|
||||
default:
|
||||
return os << "UNKNOWN";
|
||||
}
|
||||
@ -286,6 +289,7 @@ struct analyze_content : analyze_base {
|
||||
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
||||
|
||||
bool is_always_wrapped() const;
|
||||
bool is_end_delimited() const;
|
||||
common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
|
||||
};
|
||||
|
||||
|
||||
@ -45,6 +45,28 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
|
||||
LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
|
||||
}
|
||||
},
|
||||
// Poolside Laguna templates prefill <think> in the generation prompt, so generated
|
||||
// reasoning starts immediately and is delimited only by </think>.
|
||||
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
||||
if (tmpl.src.find("laguna_glm_thinking") != std::string::npos &&
|
||||
tmpl.src.find("{{- \"<assistant>\\n\" -}}") != std::string::npos &&
|
||||
tmpl.src.find("{{- '<think>' -}}") != std::string::npos) {
|
||||
analysis.reasoning.mode = reasoning_mode::TAG_BASED;
|
||||
analysis.reasoning.start = "";
|
||||
analysis.reasoning.end = "</think>";
|
||||
analysis.content.mode = content_mode::END_DELIMITED;
|
||||
analysis.content.end = "</assistant>";
|
||||
if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</think>") ==
|
||||
analysis.preserved_tokens.end()) {
|
||||
analysis.preserved_tokens.push_back("</think>");
|
||||
}
|
||||
if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</assistant>") ==
|
||||
analysis.preserved_tokens.end()) {
|
||||
analysis.preserved_tokens.push_back("</assistant>");
|
||||
}
|
||||
LOG_DBG(ANSI_ORANGE "[Patch: Poolside Laguna thinking template]\n" ANSI_RESET);
|
||||
}
|
||||
},
|
||||
// Granite 3.3, with separate reasoning and content markers
|
||||
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
||||
if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
|
||||
@ -552,6 +574,10 @@ bool analyze_content::is_always_wrapped() const {
|
||||
return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
|
||||
}
|
||||
|
||||
bool analyze_content::is_end_delimited() const {
|
||||
return mode == content_mode::END_DELIMITED && !end.empty();
|
||||
}
|
||||
|
||||
analyze_tools::analyze_tools(const common_chat_template & tmpl,
|
||||
const jinja::caps & caps,
|
||||
const analyze_reasoning & reasoning)
|
||||
|
||||
@ -69,6 +69,7 @@ Three outcomes for reasoning-prefill handling (in `generate_parser()`):
|
||||
| `PLAIN` | No content markers |
|
||||
| `ALWAYS_WRAPPED` | Content always wrapped: `<response>...</response>` |
|
||||
| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present |
|
||||
| `END_DELIMITED` | Content has no start marker but ends at a marker |
|
||||
|
||||
**`tool_format`**: Classification of tool call structure.
|
||||
|
||||
@ -357,6 +358,7 @@ A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches a
|
||||
3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
|
||||
4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
|
||||
5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
|
||||
6. **Poolside Laguna** — source contains `laguna_glm_thinking` and the Laguna generation prompt pattern: sets delimiter-style reasoning ending at `</think>` and `END_DELIMITED` content ending at `</assistant>`
|
||||
|
||||
### Parser Building
|
||||
|
||||
@ -380,6 +382,7 @@ Note: The start marker may be empty either because the analyzer detected delimit
|
||||
| Tools present | Dispatches to `analyze_tools::build_parser()` |
|
||||
| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` |
|
||||
| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` |
|
||||
| `END_DELIMITED` | `reasoning + content(until(end) or rest()) + optional end marker + end()` |
|
||||
| Default (PLAIN) | `reasoning + content(rest()) + end()` |
|
||||
|
||||
#### Tool Parsers (`analyze_tools::build_parser`)
|
||||
@ -392,7 +395,7 @@ Dispatches by `format.mode`:
|
||||
- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
|
||||
- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
|
||||
|
||||
Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
|
||||
Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`. If content is `END_DELIMITED`, the content end marker is also accepted after parsed tool calls.
|
||||
|
||||
**`build_tool_parser_tag_json()`**: For each tool function:
|
||||
|
||||
@ -417,7 +420,7 @@ For closing: uses `function.close` if present; otherwise uses `peek(per_call_end
|
||||
All three tool parsers return:
|
||||
|
||||
```text
|
||||
reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
|
||||
reasoning + optional(content(until(trigger_marker))) + tool_calls + optional(content_end) + end()
|
||||
```
|
||||
|
||||
Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker).
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user