server : return HTTP 400 on invalid grammar (#24144) (#24154)

Throw on grammar parse failure so the server returns HTTP 400
instead of silently dropping the constraint.
Add a regression test for the invalid-grammar response.

Fixes #24144
This commit is contained in:
Anuj Attri 2026-06-18 06:49:14 -04:00 committed by GitHub
parent 552258c535
commit 10786217e9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 17 additions and 0 deletions

View File

@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
}
}
}
if (!grmr && !grammar_str.empty()) {
throw std::runtime_error("failed to parse grammar");
}
// Compute prefill tokens from the generation prompt
std::vector<llama_token> prefill_tokens;

View File

@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"]
def test_completion_with_invalid_grammar():
global server
server.start()
res = server.make_request("POST", "/chat/completions", data={
"max_tokens": 8,
"messages": [
{"role": "user", "content": "Does not matter what I say, does it?"},
],
"grammar": "root ::= this is (not valid GBNF",
})
assert res.status_code == 400, res.body
assert "error" in res.body
@pytest.mark.parametrize("messages", [
None,
"string",