mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2026-06-27 23:50:20 -05:00
Throw on grammar parse failure so the server returns HTTP 400 instead of silently dropping the constraint. Add a regression test for the invalid-grammar response. Fixes #24144
This commit is contained in:
parent
552258c535
commit
10786217e9
@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!grmr && !grammar_str.empty()) {
|
||||
throw std::runtime_error("failed to parse grammar");
|
||||
}
|
||||
|
||||
// Compute prefill tokens from the generation prompt
|
||||
std::vector<llama_token> prefill_tokens;
|
||||
|
||||
@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
|
||||
assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"]
|
||||
|
||||
|
||||
def test_completion_with_invalid_grammar():
|
||||
global server
|
||||
server.start()
|
||||
res = server.make_request("POST", "/chat/completions", data={
|
||||
"max_tokens": 8,
|
||||
"messages": [
|
||||
{"role": "user", "content": "Does not matter what I say, does it?"},
|
||||
],
|
||||
"grammar": "root ::= this is (not valid GBNF",
|
||||
})
|
||||
assert res.status_code == 400, res.body
|
||||
assert "error" in res.body
|
||||
|
||||
|
||||
@pytest.mark.parametrize("messages", [
|
||||
None,
|
||||
"string",
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user