mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
common: handle Laguna chat delimiters (#1943)
* common: handle Laguna chat delimiters * common: limit tool parser changes to end-delimited content --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
This commit is contained in:
parent
366e478cb6
commit
71d5aa21f7
@ -93,7 +93,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
|
|||||||
}
|
}
|
||||||
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
return build_chat_peg_parser([&](common_chat_peg_builder & p) {
|
||||||
parser_build_context ctx(p, inputs);
|
parser_build_context ctx(p, inputs);
|
||||||
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
|
bool extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && inputs.enable_thinking;
|
||||||
|
|
||||||
ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
|
ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
|
||||||
ctx.content = &content;
|
ctx.content = &content;
|
||||||
@ -155,6 +155,16 @@ common_peg_parser analyze_content::build_parser(parser_build_context & ctx) cons
|
|||||||
}
|
}
|
||||||
return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
|
return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
|
||||||
}
|
}
|
||||||
|
if (is_end_delimited()) {
|
||||||
|
auto content = p.choice({
|
||||||
|
p.content(p.until(end)) + p.optspace(end),
|
||||||
|
p.content(p.rest()),
|
||||||
|
});
|
||||||
|
if (ctx.extracting_reasoning) {
|
||||||
|
return ctx.reasoning_parser + p.space() + content + p.end();
|
||||||
|
}
|
||||||
|
return content + p.end();
|
||||||
|
}
|
||||||
return ctx.reasoning_parser + p.content(p.rest()) + p.end();
|
return ctx.reasoning_parser + p.content(p.rest()) + p.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,7 +226,6 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
|
|||||||
auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
|
auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
|
||||||
return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
|
return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string tool_start = "{";
|
std::string tool_start = "{";
|
||||||
if (!format.section_start.empty()) {
|
if (!format.section_start.empty()) {
|
||||||
tool_start = format.section_start;
|
tool_start = format.section_start;
|
||||||
@ -224,7 +233,12 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
|
|||||||
tool_start = format.per_call_start;
|
tool_start = format.per_call_start;
|
||||||
}
|
}
|
||||||
|
|
||||||
return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
|
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||||
|
return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||||
|
return ctx.reasoning_parser + p.space() + p.optional(p.content(p.until(tool_start))) + tools_parser + content_end + p.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
|
common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
|
||||||
@ -333,7 +347,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context
|
|||||||
|
|
||||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
|
||||||
|
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||||
|
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||||
|
return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
|
common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
|
||||||
@ -464,7 +484,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
|
|||||||
|
|
||||||
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
std::string trigger_marker = !format.section_start.empty() ? format.section_start : format.per_call_start;
|
||||||
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
auto content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
|
||||||
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
|
||||||
|
if (!ctx.content || !ctx.content->is_end_delimited()) {
|
||||||
|
return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto content_end = p.optional(p.optspace(ctx.content->end));
|
||||||
|
return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace autoparser
|
} // namespace autoparser
|
||||||
|
|||||||
@ -101,6 +101,7 @@ enum class content_mode {
|
|||||||
PLAIN, // No content markers
|
PLAIN, // No content markers
|
||||||
ALWAYS_WRAPPED, // Content always wrapped with markers
|
ALWAYS_WRAPPED, // Content always wrapped with markers
|
||||||
WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
|
WRAPPED_WITH_REASONING, // Content wrapped only when reasoning present
|
||||||
|
END_DELIMITED, // Content is terminated by a marker but has no start marker
|
||||||
};
|
};
|
||||||
|
|
||||||
inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
||||||
@ -111,6 +112,8 @@ inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
|
|||||||
return os << "ALWAYS_WRAPPED";
|
return os << "ALWAYS_WRAPPED";
|
||||||
case content_mode::WRAPPED_WITH_REASONING:
|
case content_mode::WRAPPED_WITH_REASONING:
|
||||||
return os << "WRAPPED_WITH_REASONING";
|
return os << "WRAPPED_WITH_REASONING";
|
||||||
|
case content_mode::END_DELIMITED:
|
||||||
|
return os << "END_DELIMITED";
|
||||||
default:
|
default:
|
||||||
return os << "UNKNOWN";
|
return os << "UNKNOWN";
|
||||||
}
|
}
|
||||||
@ -286,6 +289,7 @@ struct analyze_content : analyze_base {
|
|||||||
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
common_peg_parser build_parser(parser_build_context & ctx) const override;
|
||||||
|
|
||||||
bool is_always_wrapped() const;
|
bool is_always_wrapped() const;
|
||||||
|
bool is_end_delimited() const;
|
||||||
common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
|
common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
@ -45,6 +45,28 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
|
|||||||
LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
|
LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
// Poolside Laguna templates prefill <think> in the generation prompt, so generated
|
||||||
|
// reasoning starts immediately and is delimited only by </think>.
|
||||||
|
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
||||||
|
if (tmpl.src.find("laguna_glm_thinking") != std::string::npos &&
|
||||||
|
tmpl.src.find("{{- \"<assistant>\\n\" -}}") != std::string::npos &&
|
||||||
|
tmpl.src.find("{{- '<think>' -}}") != std::string::npos) {
|
||||||
|
analysis.reasoning.mode = reasoning_mode::TAG_BASED;
|
||||||
|
analysis.reasoning.start = "";
|
||||||
|
analysis.reasoning.end = "</think>";
|
||||||
|
analysis.content.mode = content_mode::END_DELIMITED;
|
||||||
|
analysis.content.end = "</assistant>";
|
||||||
|
if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</think>") ==
|
||||||
|
analysis.preserved_tokens.end()) {
|
||||||
|
analysis.preserved_tokens.push_back("</think>");
|
||||||
|
}
|
||||||
|
if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</assistant>") ==
|
||||||
|
analysis.preserved_tokens.end()) {
|
||||||
|
analysis.preserved_tokens.push_back("</assistant>");
|
||||||
|
}
|
||||||
|
LOG_DBG(ANSI_ORANGE "[Patch: Poolside Laguna thinking template]\n" ANSI_RESET);
|
||||||
|
}
|
||||||
|
},
|
||||||
// Granite 3.3, with separate reasoning and content markers
|
// Granite 3.3, with separate reasoning and content markers
|
||||||
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
[](const common_chat_template & tmpl, autoparser & analysis) -> void {
|
||||||
if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
|
if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
|
||||||
@ -552,6 +574,10 @@ bool analyze_content::is_always_wrapped() const {
|
|||||||
return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
|
return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool analyze_content::is_end_delimited() const {
|
||||||
|
return mode == content_mode::END_DELIMITED && !end.empty();
|
||||||
|
}
|
||||||
|
|
||||||
analyze_tools::analyze_tools(const common_chat_template & tmpl,
|
analyze_tools::analyze_tools(const common_chat_template & tmpl,
|
||||||
const jinja::caps & caps,
|
const jinja::caps & caps,
|
||||||
const analyze_reasoning & reasoning)
|
const analyze_reasoning & reasoning)
|
||||||
|
|||||||
@ -69,6 +69,7 @@ Three outcomes for reasoning-prefill handling (in `generate_parser()`):
|
|||||||
| `PLAIN` | No content markers |
|
| `PLAIN` | No content markers |
|
||||||
| `ALWAYS_WRAPPED` | Content always wrapped: `<response>...</response>` |
|
| `ALWAYS_WRAPPED` | Content always wrapped: `<response>...</response>` |
|
||||||
| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present |
|
| `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present |
|
||||||
|
| `END_DELIMITED` | Content has no start marker but ends at a marker |
|
||||||
|
|
||||||
**`tool_format`**: Classification of tool call structure.
|
**`tool_format`**: Classification of tool call structure.
|
||||||
|
|
||||||
@ -357,6 +358,7 @@ A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches a
|
|||||||
3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
|
3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
|
||||||
4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
|
4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
|
||||||
5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
|
5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
|
||||||
|
6. **Poolside Laguna** — source contains `laguna_glm_thinking` and the Laguna generation prompt pattern: sets delimiter-style reasoning ending at `</think>` and `END_DELIMITED` content ending at `</assistant>`
|
||||||
|
|
||||||
### Parser Building
|
### Parser Building
|
||||||
|
|
||||||
@ -380,6 +382,7 @@ Note: The start marker may be empty either because the analyzer detected delimit
|
|||||||
| Tools present | Dispatches to `analyze_tools::build_parser()` |
|
| Tools present | Dispatches to `analyze_tools::build_parser()` |
|
||||||
| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` |
|
| `ALWAYS_WRAPPED` with reasoning | `reasoning + start + content(until(end)) + end + end()` |
|
||||||
| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` |
|
| `ALWAYS_WRAPPED` without reasoning | `content(until(start)) + start + content(until(end)) + end + end()` |
|
||||||
|
| `END_DELIMITED` | `reasoning + content(until(end) or rest()) + optional end marker + end()` |
|
||||||
| Default (PLAIN) | `reasoning + content(rest()) + end()` |
|
| Default (PLAIN) | `reasoning + content(rest()) + end()` |
|
||||||
|
|
||||||
#### Tool Parsers (`analyze_tools::build_parser`)
|
#### Tool Parsers (`analyze_tools::build_parser`)
|
||||||
@ -392,7 +395,7 @@ Dispatches by `format.mode`:
|
|||||||
- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
|
- `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
|
||||||
- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
|
- `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
|
||||||
|
|
||||||
Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
|
Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`. If content is `END_DELIMITED`, the content end marker is also accepted after parsed tool calls.
|
||||||
|
|
||||||
**`build_tool_parser_tag_json()`**: For each tool function:
|
**`build_tool_parser_tag_json()`**: For each tool function:
|
||||||
|
|
||||||
@ -417,7 +420,7 @@ For closing: uses `function.close` if present; otherwise uses `peek(per_call_end
|
|||||||
All three tool parsers return:
|
All three tool parsers return:
|
||||||
|
|
||||||
```text
|
```text
|
||||||
reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
|
reasoning + optional(content(until(trigger_marker))) + tool_calls + optional(content_end) + end()
|
||||||
```
|
```
|
||||||
|
|
||||||
Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker).
|
Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker).
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user