common: handle Laguna chat delimiters (#1943)

* common: handle Laguna chat delimiters * common: limit tool parser changes to end-delimited content --------- Co-authored-by: Joel Farthing <262452229+joelfarthing@users.noreply.github.com>
2026-06-28 04:30:15 -05:00 · 2026-06-10 00:46:19 -05:00 · 2026-06-10 00:46:19 -05:00 · 71d5aa21f7
commit 71d5aa21f7
parent 366e478cb6
4 changed files with 66 additions and 7 deletions
--- a/common/chat-auto-parser-generator.cpp
+++ b/common/chat-auto-parser-generator.cpp
@ -93,7 +93,7 @@ common_peg_arena autoparser::build_parser(const generation_params & inputs) cons
    }
    return build_chat_peg_parser([&](common_chat_peg_builder & p) {
        parser_build_context ctx(p, inputs);
-        bool                 extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE;
+        bool                 extract_reasoning = inputs.reasoning_format != COMMON_REASONING_FORMAT_NONE && inputs.enable_thinking;
        ctx.extracting_reasoning = extract_reasoning && reasoning.mode != reasoning_mode::NONE;
        ctx.content              = &content;
@ -155,6 +155,16 @@ common_peg_parser analyze_content::build_parser(parser_build_context & ctx) cons
        }
        return p.content(p.until(start)) + start + p.content(p.until(end)) + end + p.end();
    }
    if (is_end_delimited()) {
        auto content = p.choice({
            p.content(p.until(end)) + p.optspace(end),
            p.content(p.rest()),
        });
        if (ctx.extracting_reasoning) {
            return ctx.reasoning_parser + p.space() + content + p.end();
        }
        return content + p.end();
    }
    return ctx.reasoning_parser + p.content(p.rest()) + p.end();
 }
@ -216,7 +226,6 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
        auto wrapped_content = ctx.content->build_optional_wrapped(ctx);
        return ctx.reasoning_parser + wrapped_content + tools_parser + p.end();
    }
    std::string tool_start = "{";
    if (!format.section_start.empty()) {
        tool_start = format.section_start;
@ -224,7 +233,12 @@ common_peg_parser analyze_tools::build_tool_parser_json_native(parser_build_cont
        tool_start = format.per_call_start;
    }
-    return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
+    if (!ctx.content || !ctx.content->is_end_delimited()) {
        return ctx.reasoning_parser + p.optional(p.content(p.until(tool_start))) + tools_parser + p.end();
    }
    auto content_end = p.optional(p.optspace(ctx.content->end));
    return ctx.reasoning_parser + p.space() + p.optional(p.content(p.until(tool_start))) + tools_parser + content_end + p.end();
 }
 common_peg_parser analyze_tools::build_func_parser(common_chat_peg_builder & p, const std::string & name,
@ -333,7 +347,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_json(parser_build_context
    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
-    return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
+
    if (!ctx.content || !ctx.content->is_end_delimited()) {
        return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
    }
    auto content_end = p.optional(p.optspace(ctx.content->end));
    return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
 }
 common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_context & ctx) const {
@ -464,7 +484,13 @@ common_peg_parser analyze_tools::build_tool_parser_tag_tagged(parser_build_conte
    std::string trigger_marker       = !format.section_start.empty() ? format.section_start : format.per_call_start;
    auto        content_before_tools = trigger_marker.empty() ? p.eps() : p.until(trigger_marker);
-    return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
+
    if (!ctx.content || !ctx.content->is_end_delimited()) {
        return ctx.reasoning_parser + p.optional(p.content(content_before_tools)) + tool_calls + p.end();
    }
    auto content_end = p.optional(p.optspace(ctx.content->end));
    return ctx.reasoning_parser + p.space() + p.optional(p.content(content_before_tools)) + tool_calls + content_end + p.end();
 }
 }  // namespace autoparser
--- a/common/chat-auto-parser.h
+++ b/common/chat-auto-parser.h
@ -101,6 +101,7 @@ enum class content_mode {
    PLAIN,                   // No content markers
    ALWAYS_WRAPPED,          // Content always wrapped with markers
    WRAPPED_WITH_REASONING,  // Content wrapped only when reasoning present
    END_DELIMITED,           // Content is terminated by a marker but has no start marker
 };
 inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
@ -111,6 +112,8 @@ inline std::ostream & operator<<(std::ostream & os, const content_mode & mode) {
            return os << "ALWAYS_WRAPPED";
        case content_mode::WRAPPED_WITH_REASONING:
            return os << "WRAPPED_WITH_REASONING";
        case content_mode::END_DELIMITED:
            return os << "END_DELIMITED";
        default:
            return os << "UNKNOWN";
    }
@ -286,6 +289,7 @@ struct analyze_content : analyze_base {
    common_peg_parser build_parser(parser_build_context & ctx) const override;
    bool is_always_wrapped() const;
    bool is_end_delimited() const;
    common_peg_parser build_optional_wrapped(parser_build_context & ctx) const;
 };
--- a/common/chat-diff-analyzer.cpp
+++ b/common/chat-diff-analyzer.cpp
@ -45,6 +45,28 @@ static std::vector<std::function<void(const common_chat_template & tmpl, autopar
              LOG_DBG(ANSI_ORANGE "[Patch: old Qwen/Deepseek thinking template]\n" ANSI_RESET);
          }
      },
      // Poolside Laguna templates prefill <think> in the generation prompt, so generated
      // reasoning starts immediately and is delimited only by </think>.
      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
          if (tmpl.src.find("laguna_glm_thinking") != std::string::npos &&
              tmpl.src.find("{{- \"<assistant>\\n\" -}}") != std::string::npos &&
              tmpl.src.find("{{- '<think>' -}}") != std::string::npos) {
              analysis.reasoning.mode  = reasoning_mode::TAG_BASED;
              analysis.reasoning.start = "";
              analysis.reasoning.end   = "</think>";
              analysis.content.mode     = content_mode::END_DELIMITED;
              analysis.content.end      = "</assistant>";
              if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</think>") ==
                  analysis.preserved_tokens.end()) {
                  analysis.preserved_tokens.push_back("</think>");
              }
              if (std::find(analysis.preserved_tokens.begin(), analysis.preserved_tokens.end(), "</assistant>") ==
                  analysis.preserved_tokens.end()) {
                  analysis.preserved_tokens.push_back("</assistant>");
              }
              LOG_DBG(ANSI_ORANGE "[Patch: Poolside Laguna thinking template]\n" ANSI_RESET);
          }
      },
      // Granite 3.3, with separate reasoning and content markers
      [](const common_chat_template & tmpl, autoparser & analysis) -> void {
          if (tmpl.src.find("Write your thoughts between <think></think> and write your response between "
@ -552,6 +574,10 @@ bool analyze_content::is_always_wrapped() const {
    return mode == content_mode::ALWAYS_WRAPPED && !start.empty() && !end.empty();
 }
 bool analyze_content::is_end_delimited() const {
    return mode == content_mode::END_DELIMITED && !end.empty();
 }
 analyze_tools::analyze_tools(const common_chat_template & tmpl,
                             const jinja::caps &          caps,
                             const analyze_reasoning &    reasoning)
--- a/docs/autoparser.md
+++ b/docs/autoparser.md
@ -69,6 +69,7 @@ Three outcomes for reasoning-prefill handling (in `generate_parser()`):
 | `PLAIN`                  | No content markers                                             |
 | `ALWAYS_WRAPPED`         | Content always wrapped: `<response>...</response>`             |
 | `WRAPPED_WITH_REASONING` | Content wrapped only when reasoning is present                 |
 | `END_DELIMITED`          | Content has no start marker but ends at a marker               |
 **`tool_format`**: Classification of tool call structure.
@ -357,6 +358,7 @@ A workaround array in `common/chat-diff-analyzer.cpp` applies post-hoc patches a
 3. **Cohere Command R+** — source contains `<|CHATBOT_TOKEN|>`: sets `ALWAYS_WRAPPED` content mode if no content start is already set
 4. **Functionary 3.1** — source contains `set has_code_interpreter`: forces `PLAIN` content, specific `per_call_start/end`, clears preserved tokens to only keep Functionary-specific markers
 5. **DeepSeek-R1-Distill-Qwen** — source contains `tool▁calls▁begin` markers: overrides tool section/per-call markers with the correct Unicode block characters
 6. **Poolside Laguna** — source contains `laguna_glm_thinking` and the Laguna generation prompt pattern: sets delimiter-style reasoning ending at `</think>` and `END_DELIMITED` content ending at `</assistant>`
 ### Parser Building
@ -380,6 +382,7 @@ Note: The start marker may be empty either because the analyzer detected delimit
 | Tools present                          | Dispatches to `analyze_tools::build_parser()`                                   |
 | `ALWAYS_WRAPPED` with reasoning        | `reasoning + start + content(until(end)) + end + end()`                         |
 | `ALWAYS_WRAPPED` without reasoning     | `content(until(start)) + start + content(until(end)) + end + end()`             |
 | `END_DELIMITED`                        | `reasoning + content(until(end) or rest()) + optional end marker + end()`       |
 | Default (PLAIN)                        | `reasoning + content(rest()) + end()`                                           |
 #### Tool Parsers (`analyze_tools::build_parser`)
@ -392,7 +395,7 @@ Dispatches by `format.mode`:
 - `build_json_tools_nested_keys()` — nested: `{"function": {"name": "X", "arguments": {...}}}`
 - `build_json_tools_flat_keys()` — flat: `{"name": "X", "arguments": {...}}`
-Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`.
+Handles content wrappers, array wrapping (`tools_array_wrapped`), parallel calls, and `parameter_order`. If content is `END_DELIMITED`, the content end marker is also accepted after parsed tool calls.
 **`build_tool_parser_tag_json()`**: For each tool function:
@ -417,7 +420,7 @@ For closing: uses `function.close` if present; otherwise uses `peek(per_call_end
 All three tool parsers return:
 ```text
-reasoning + optional(content(until(trigger_marker))) + tool_calls + end()
+reasoning + optional(content(until(trigger_marker))) + tool_calls + optional(content_end) + end()
 ```
 Each returned parser is wrapped by `wrap_for_generation_prompt()`, which prepends a literal for any boilerplate prefix of the generation prompt (the portion before the reasoning start marker).