mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
chat: fix an "oldie but goodie" grammar generator bug that surfaced during last changes
This commit is contained in:
parent
9dbc6621ae
commit
6786edd14a
@ -1507,6 +1507,7 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
|
||||
auto pieces = matcher.collect_prefix_and_next();
|
||||
|
||||
std::string pattern;
|
||||
std::string trailing; // optional proper-prefix of a delimiter, allowed only at the very end
|
||||
for (size_t i = 0; i < pieces.size(); ++i) {
|
||||
if (i > 0) {
|
||||
pattern += " | ";
|
||||
@ -1522,13 +1523,32 @@ static std::string gbnf_excluding_pattern(const std::vector<std::string> & strin
|
||||
}
|
||||
|
||||
if (!pre.empty()) {
|
||||
pattern += gbnf_format_literal(common_unicode_cpts_to_utf8(pre)) + " [^" + cls + "]";
|
||||
std::string pre_literal = gbnf_format_literal(common_unicode_cpts_to_utf8(pre));
|
||||
pattern += pre_literal + " [^" + cls + "]";
|
||||
// Each interior alternative consumes a delimiter-prefix plus a disambiguating
|
||||
// char, so the repetition alone cannot match a value that *ends* on a proper
|
||||
// prefix of a delimiter (e.g. a trailing "\n" when the delimiter is
|
||||
// "\n</parameter>\n"). The runtime until() (greedy first-match) accepts such
|
||||
// values, so without this the grammar would reject input the parser accepts.
|
||||
// Allow the value to terminate on any proper prefix as an optional tail.
|
||||
// This makes the grammar a slight superset of the runtime language (a value
|
||||
// may end on the longest prefix, which greedy first-match would not itself
|
||||
// produce); harmless for constrained generation, which only needs to admit
|
||||
// every runtime-valid string.
|
||||
if (!trailing.empty()) {
|
||||
trailing += " | ";
|
||||
}
|
||||
trailing += pre_literal;
|
||||
} else {
|
||||
pattern += "[^" + cls + "]";
|
||||
}
|
||||
}
|
||||
|
||||
return "(" + pattern + ")*";
|
||||
std::string result = "(" + pattern + ")*";
|
||||
if (!trailing.empty()) {
|
||||
result += " (" + trailing + ")?";
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
static std::unordered_set<std::string> collect_reachable_rules(
|
||||
|
||||
@ -2024,6 +2024,61 @@ static void test_template_output_peg_parsers(bool detailed_debug) {
|
||||
})
|
||||
.run();
|
||||
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.c\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>\n"
|
||||
"#iclunde\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>\n"
|
||||
"#include\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
edit_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "edit", "{\"filename\": \"foo.c\", \"oldString\": \"#iclunde\", \"newString\": \"#include\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
// a parameter value that itself ends in a newline (e.g. a source file with a
|
||||
// trailing newline). The structural delimiter is "\n</parameter>\n", so the value
|
||||
// "#include\n" renders as "...#include\n\n</parameter>\n". The trailing newline must
|
||||
// be preserved faithfully (no stripping), and the generated grammar must admit a
|
||||
// value ending on a delimiter prefix. Regression test for gbnf_excluding_pattern.
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
"<function=edit>\n"
|
||||
"<parameter=filename>\n"
|
||||
"foo.c\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=oldString>\n"
|
||||
"#iclunde\n"
|
||||
"</parameter>\n"
|
||||
"<parameter=newString>\n"
|
||||
"#include\n"
|
||||
"\n"
|
||||
"</parameter>\n"
|
||||
"</function>\n"
|
||||
"</tool_call>")
|
||||
.enable_thinking(false)
|
||||
.reasoning_format(COMMON_REASONING_FORMAT_AUTO)
|
||||
.tools({
|
||||
edit_tool
|
||||
})
|
||||
.expect_tool_calls({
|
||||
{ "edit", "{\"filename\": \"foo.c\", \"oldString\": \"#iclunde\", \"newString\": \"#include\\n\"}", {} },
|
||||
})
|
||||
.run();
|
||||
|
||||
|
||||
// test code that starts with indent
|
||||
tst.test(
|
||||
"<tool_call>\n"
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user