server : return HTTP 400 on invalid grammar (#24144) (#24154)

Throw on grammar parse failure so the server returns HTTP 400 instead of silently dropping the constraint. Add a regression test for the invalid-grammar response. Fixes #24144
2026-06-27 23:50:20 -05:00 · 2026-06-18 06:49:14 -04:00 · 2026-06-18 06:49:14 -04:00 · 10786217e9
commit 10786217e9
parent 552258c535
2 changed files with 17 additions and 0 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -259,6 +259,9 @@ struct common_sampler * common_sampler_init(const struct llama_model * model, st
             }
        }
    }
+    if (!grmr && !grammar_str.empty()) {
+        throw std::runtime_error("failed to parse grammar");
+    }

    // Compute prefill tokens from the generation prompt
    std::vector<llama_token> prefill_tokens;
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@ -307,6 +307,20 @@ def test_completion_with_grammar(jinja: bool, grammar: str, n_predicted: int, re
    assert match_regex(re_content, choice["message"]["content"]), choice["message"]["content"]


+def test_completion_with_invalid_grammar():
+    global server
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "max_tokens": 8,
+        "messages": [
+            {"role": "user", "content": "Does not matter what I say, does it?"},
+        ],
+        "grammar": "root ::= this is (not valid GBNF",
+    })
+    assert res.status_code == 400, res.body
+    assert "error" in res.body
+
+
@pytest.mark.parametrize("messages", [
    None,
    "string",