mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
* grammar: Fix grammar root symbol check (#19761) * grammar: fix bad check for root symbol, correct error logging * add tests to demonstrate root symbol check failure # Conflicts: # tests/test-grammar-integration.cpp * common/grammar: fix grammar parsing issues to prevent stack overflow and hangs (#18604) * grammar: add test case for nullable symbol loop Reproduce stack overflow (or OOM) with ( [x]* )* found while adding GBNF support to ripgrep-edit. llama-server reproducer: curl \ -X POST \ -d '{ "messages": [{ "role": "user", "content": "write yes" }], "grammar": "root ::= ( [x]* )*" }' \ -H "Content-Type: application/json" \ http://localhost:8811/v1/chat/completions * grammar: prevent stack overflow with nullable symbol loop Fix a potential stack overflow in llama_grammar_advance_stack that could occur when processing grammars with nullable symbols that lead to infinite derivations of empty strings. The fix introduces cycle detection by tracking visited stacks to prevent infinite recursion. rg-edit regexp: llama_grammar_advance_stack rg-edit extra-args: -A20 rg-edit directive: """Rewrite: fix the following segfault: [..] ⚫ Testing segfault. Grammar: root ::= ( [x]* )* root ::= ( [x]* )* Segmentation fault build/bin/test-grammar-integration""" gptel-context: (("~/llama.cpp/src/llama-grammar.cpp") ("~/llama.cpp/tests/test-grammar-integration.cpp") ("~/llama.cpp/grammars/./list.gbnf") ("~/llama.cpp/grammars/./json_arr.gbnf") ("~/llama.cpp/grammars/./json.gbnf") ("~/llama.cpp/grammars/./japanese.gbnf") ("~/llama.cpp/grammars/./english.gbnf") ("~/llama.cpp/grammars/./chess.gbnf") ("~/llama.cpp/grammars/./c.gbnf") ("~/llama.cpp/grammars/./arithmetic.gbnf") ("~/llama.cpp/grammars/./README.md")) * grammar: convert recursive llama_grammar_advance_stack to iterative This change converts the function to an iterative approach using explicit stacks, which prevents deep recursion and eliminates the risk of stack overflow. rg-edit regexp: llama_grammar_advance_stack rg-edit extra-args: -A30 rg-edit directive: """Rewrite: fix the following segfault: [..] ⚫ Testing segfault. Grammar: root ::= ( [x]* )* root ::= ( [x]* )* Segmentation fault build/bin/test-grammar-integration convert from recursive to interactive""" gptel-context: (("~/llama.cpp/src/llama-grammar.cpp") ("~/llama.cpp/tests/test-grammar-integration.cpp") ("~/llama.cpp/grammars/./list.gbnf") ("~/llama.cpp/grammars/./json_arr.gbnf") ("~/llama.cpp/grammars/./json.gbnf") ("~/llama.cpp/grammars/./japanese.gbnf") ("~/llama.cpp/grammars/./english.gbnf") ("~/llama.cpp/grammars/./chess.gbnf") ("~/llama.cpp/grammars/./c.gbnf") ("~/llama.cpp/grammars/./arithmetic.gbnf") ("~/llama.cpp/grammars/./README.md")) v2: Added a `std::set` to perform tree-based lookups with O(N log N) complexity. Testing with a parallel run of `test-grammar-integration` shows a double-digit percentage increase in runtime. An `unordered_set` with O(1) hashing was also evaluated, but the overhead of constructing hash keys from pointers made it significantly slower than the rbtree implementation that only requires an ordering operator. The performance regression in the test suite appears justified by the overall reduction in algorithmic complexity. Co-developed-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com> * grammar: add test case for hang in repetition grammar processing This commit adds a new test case to the grammar integration tests that specifically targets a hang scenario in the repetition grammar parser found while adding GBNF support to ripgrep-edit. llama-server reproducer: curl \ -X POST \ -d '{ "messages": [{ "role": "user", "content": "write yes" }], "grammar": "root ::= (([^x]*){0,99}){0,99}" }' \ -H "Content-Type: application/json" \ http://localhost:8811/v1/chat/completions * grammar: add repetition threshold check The change introduces a maximum repetition threshold to avoid excessive rule expansion during grammar parsing. When parsing repetition patterns like {m,n}, the parser now calculates the potential number of rules that would be generated and throws an error if the product of previous rules and new rules exceeds the threshold. A test case was added to verify the threshold is properly enforced for deeply nested repetition patterns that would otherwise cause hangs. --------- Co-authored-by: Asbjørn Olling <asbjornolling@gmail.com> Co-authored-by: Andrea Arcangeli <aarcange@redhat.com>
1497 lines
42 KiB
C++
1497 lines
42 KiB
C++
#ifdef NDEBUG
|
|
#undef NDEBUG
|
|
#endif
|
|
|
|
#define LLAMA_API_INTERNAL
|
|
|
|
#include "ggml.h"
|
|
#include "llama.h"
|
|
#include "grammar-parser.h"
|
|
#include "json-schema-to-grammar.h"
|
|
#include "unicode.h"
|
|
#include <cassert>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
using json = nlohmann::ordered_json;
|
|
|
|
static llama_grammar * build_grammar_with_root(const std::string & grammar_str, const char * grammar_root) {
|
|
return llama_grammar_init_impl(nullptr, grammar_str.c_str(), grammar_root, false, nullptr, 0, nullptr, 0);
|
|
}
|
|
|
|
static llama_grammar * build_grammar(const std::string & grammar_str) {
|
|
return build_grammar_with_root(grammar_str, "root");
|
|
}
|
|
|
|
static bool test_build_grammar_fails(const std::string & grammar_str) {
|
|
fprintf(stderr, "⚫ Testing failure for grammar: %s\n", grammar_str.c_str());
|
|
bool grammar_fails = false;
|
|
llama_grammar * grammar = build_grammar(grammar_str);
|
|
if (grammar != nullptr) {
|
|
fprintf(stderr, " ❌ Expected build failure, but succeeded\n");
|
|
} else {
|
|
grammar_fails = true;
|
|
fprintf(stdout, " ✅︎\n");
|
|
}
|
|
return grammar_fails;
|
|
}
|
|
|
|
struct token_and_piece {
|
|
llama_token token;
|
|
std::string piece;
|
|
};
|
|
|
|
// token() encodes a 32-bit ID as 5 bytes: a 0xff marker followed by the ID in big-endian order.
|
|
static std::string token(llama_token id) {
|
|
return std::string{
|
|
static_cast<char>(0xff),
|
|
static_cast<char>((id >> 24) & 0xff),
|
|
static_cast<char>((id >> 16) & 0xff),
|
|
static_cast<char>((id >> 8) & 0xff),
|
|
static_cast<char>(id & 0xff)
|
|
};
|
|
}
|
|
|
|
// parse_tokens() parses the token encodes above and UTF-8 text.
|
|
static std::vector<token_and_piece> parse_tokens(const std::string & input) {
|
|
std::vector<token_and_piece> result;
|
|
result.reserve(input.size());
|
|
size_t offset = 0;
|
|
while (offset < input.size()) {
|
|
try {
|
|
if (static_cast<unsigned char>(input[offset]) == 0xff) {
|
|
if (offset + 5 > input.size()) {
|
|
throw std::runtime_error("not enough bytes for token id");
|
|
}
|
|
uint32_t val =
|
|
(static_cast<unsigned char>(input[offset + 1]) << 24) |
|
|
(static_cast<unsigned char>(input[offset + 2]) << 16) |
|
|
(static_cast<unsigned char>(input[offset + 3]) << 8) |
|
|
(static_cast<unsigned char>(input[offset + 4]));
|
|
auto piece = "<[" + std::to_string(val) + "]>";
|
|
result.push_back({static_cast<llama_token>(val), piece});
|
|
offset += 5;
|
|
} else {
|
|
uint32_t cpt = unicode_cpt_from_utf8(input, offset);
|
|
result.push_back({0, unicode_cpt_to_utf8(cpt)});
|
|
}
|
|
} catch (const std::invalid_argument & /*ex*/) {
|
|
// Silently ignore invalid UTF-8 input to avoid leaking the exception beyond llama_tokenize
|
|
++offset;
|
|
result.push_back({0, unicode_cpt_to_utf8(0xFFFD)}); // replacement character
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
static bool match_string(const std::string & input, llama_grammar * grammar) {
|
|
const auto parsed = parse_tokens(input);
|
|
|
|
const auto & code_points = decoded.first;
|
|
|
|
for (const auto & in : parsed) {
|
|
try {
|
|
llama_grammar_accept_token(*grammar, in.token, in.piece);
|
|
} catch (const std::runtime_error & /*e*/) {
|
|
// normally this shouldn't get hit because of llama_grammar_apply
|
|
return false;
|
|
}
|
|
|
|
for (auto it = code_points.begin(), end = code_points.end() - 1; it != end; ++it) {
|
|
const llama_grammar_stacks prev_stacks = llama_grammar_get_stacks(grammar); // copy
|
|
|
|
llama_grammar_accept(rules, prev_stacks, *it, cur_stacks);
|
|
|
|
if (cur_stacks.empty()) {
|
|
// no stacks means that the grammar failed to match at this point
|
|
return false;
|
|
}
|
|
}
|
|
|
|
for (const auto & stack : cur_stacks) {
|
|
if (stack.empty()) {
|
|
// An empty stack means that the grammar has been completed
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void test(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
fprintf(stderr, "⚫ Testing %s\n%s\n", test_desc.c_str(), grammar_str.c_str());
|
|
fflush(stderr);
|
|
|
|
auto grammar = build_grammar(grammar_str);
|
|
|
|
// Save the original grammar stacks so that we can reset after every new string we want to test
|
|
const llama_grammar_stacks original_stacks = llama_grammar_get_stacks(grammar);
|
|
|
|
llama_grammar_stacks & cur_stacks = llama_grammar_get_stacks(grammar);
|
|
|
|
fprintf(stderr, " 🔵 Valid strings:\n");
|
|
|
|
// Passing strings
|
|
for (const auto & test_string : passing_strings) {
|
|
fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
fflush(stderr);
|
|
|
|
bool matched = match_string(test_string, grammar);
|
|
|
|
if (!matched) {
|
|
fprintf(stderr, "❌ (failed to match)\n");
|
|
|
|
// DEBUG: Write strings to files so that we can analyze more easily with gbnf-validator program to see exactly where things failed.
|
|
// DEBUG: Write the grammar_str to test-grammar-integration.grammar.gbnf
|
|
FILE* grammar_file = fopen("test-grammar-integration.grammar.gbnf", "w");
|
|
if (grammar_file) {
|
|
fprintf(grammar_file, "%s", grammar_str.c_str());
|
|
fclose(grammar_file);
|
|
}
|
|
|
|
// DEBUG: Write the test string to test-grammar-integration.string.txt
|
|
FILE* string_file = fopen("test-grammar-integration.string.txt", "w");
|
|
if (string_file) {
|
|
fprintf(string_file, "%s", test_string.c_str());
|
|
fclose(string_file);
|
|
}
|
|
|
|
fprintf(stderr, "\n NOTE: Debug grammar file generated. To analyze this failure in detail, run the following command: ./llama-gbnf-validator test-grammar-integration.grammar.gbnf test-grammar-integration.string.txt\n\n");
|
|
} else {
|
|
fprintf(stdout, "✅︎\n");
|
|
}
|
|
|
|
assert(matched);
|
|
|
|
// Reset the grammar stacks
|
|
cur_stacks = original_stacks;
|
|
}
|
|
|
|
fprintf(stderr, " 🟠 Invalid strings:\n");
|
|
|
|
// Failing strings
|
|
for (const auto & test_string : failing_strings) {
|
|
fprintf(stderr, " \"%s\" ", test_string.c_str());
|
|
fflush(stderr);
|
|
|
|
bool matched = match_string(test_string, grammar);
|
|
|
|
if (matched) {
|
|
fprintf(stderr, "❌ (incorrectly matched)\n");
|
|
} else {
|
|
fprintf(stdout, "✅︎\n");
|
|
}
|
|
assert(!matched);
|
|
|
|
// Reset the grammar stacks
|
|
cur_stacks = original_stacks;
|
|
}
|
|
|
|
// Clean up allocated memory
|
|
llama_grammar_free(grammar);
|
|
}
|
|
static void test_grammar(const std::string & test_desc, const std::string & grammar_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
test(test_desc + ". Grammar: " + grammar_str, grammar_str, passing_strings, failing_strings);
|
|
}
|
|
static void test_schema(const std::string & test_desc, const std::string & schema_str, const std::vector<std::string> & passing_strings, const std::vector<std::string> & failing_strings) {
|
|
test(test_desc + ". Schema: " + schema_str, json_schema_to_grammar(json::parse(schema_str), true), passing_strings, failing_strings);
|
|
}
|
|
|
|
static void test_simple_grammar() {
|
|
test_schema(
|
|
"min 0",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 0
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"0",
|
|
"10",
|
|
"12",
|
|
"10000",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-1",
|
|
"-10",
|
|
"-10000",
|
|
"-100000000000000000000000000000000",
|
|
"100000000000000000000000000000000",
|
|
"00",
|
|
"01",
|
|
"-0",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 2",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 2
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"2",
|
|
"3",
|
|
"4",
|
|
"10",
|
|
"20",
|
|
"1234567890000000",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"1",
|
|
"-1",
|
|
"-100",
|
|
"0",
|
|
"1",
|
|
"01",
|
|
"02",
|
|
"12345678900000000",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 456",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 456
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"456",
|
|
"4560",
|
|
"457",
|
|
"460",
|
|
"500",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"455",
|
|
"356",
|
|
"50",
|
|
"050",
|
|
"-1",
|
|
"-456",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -123",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -123
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-123",
|
|
"-122",
|
|
"-11",
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
"123",
|
|
"1234",
|
|
"2345",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-1234",
|
|
"-124",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"max 9999",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"maximum": 9999
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-99999",
|
|
"0",
|
|
"9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"10000",
|
|
"99991",
|
|
}
|
|
);
|
|
test_schema(
|
|
"max -9999",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"maximum": -9999
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-10000",
|
|
"-9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-9998",
|
|
"0",
|
|
"9999",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 5 max 30",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": 5,
|
|
"maximum": 30
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"5",
|
|
"10",
|
|
"30",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"05",
|
|
"4",
|
|
"-1",
|
|
"31",
|
|
"123",
|
|
"0123",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min 1 max 900719925474091",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0,
|
|
"maximum": 900719925474091
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"1",
|
|
"2",
|
|
"10",
|
|
"900719925474090",
|
|
"900719925474091",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"01",
|
|
"900719925474092",
|
|
"9007199254740910",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -1 max 1",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -1,
|
|
"maximum": 1
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-11",
|
|
"-10",
|
|
"-2",
|
|
"2",
|
|
"10",
|
|
"11",
|
|
}
|
|
);
|
|
test_schema(
|
|
"min -123 max 42",
|
|
R"""({
|
|
"type": "integer",
|
|
"minimum": -123,
|
|
"maximum": 42
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"-123",
|
|
"-122",
|
|
"-13",
|
|
"-11",
|
|
"-2",
|
|
"-1",
|
|
"0",
|
|
"1",
|
|
"5",
|
|
"10",
|
|
"39",
|
|
"40",
|
|
"42",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"-0123",
|
|
"-124",
|
|
"-1123",
|
|
"-200",
|
|
"43",
|
|
"123",
|
|
"0123",
|
|
}
|
|
);
|
|
test_schema(
|
|
"exclusive min / max",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer",
|
|
"exclusiveMinimum": 0,
|
|
"exclusiveMaximum": 10000
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"1",
|
|
"9999",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"0",
|
|
"01",
|
|
"10000",
|
|
"99999",
|
|
}
|
|
);
|
|
|
|
// Test case for a simple grammar
|
|
test_grammar(
|
|
"simple grammar",
|
|
R"""(
|
|
root ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= number
|
|
number ::= [0-9]+)""",
|
|
// Passing strings
|
|
{
|
|
"42",
|
|
"1+2+3+4+5",
|
|
"123+456",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"+",
|
|
"/ 3",
|
|
"1+2+3+4+5+",
|
|
"12a45",
|
|
}
|
|
);
|
|
|
|
// Test case for a simple grammar with tokens
|
|
test_grammar(
|
|
"simple grammar with tokens",
|
|
R"""(
|
|
root ::= <[10]> content <[11]>
|
|
content ::= (!<[11]>)*)""",
|
|
// Passing strings
|
|
{
|
|
token(10) + "hello world" + token(11),
|
|
token(10) + "text with " + token(12) + " other tokens " + token(13) + " mixed in" + token(11),
|
|
token(10) + token(11),
|
|
token(10) + token(12) + token(13) + token(14) + token(15) + token(11),
|
|
token(10) + "a" + token(11),
|
|
},
|
|
// Failing strings
|
|
{
|
|
token(10) + "missing end token",
|
|
token(10),
|
|
"missing start token" + token(11),
|
|
token(10) + token(11) + token(11), // double end token
|
|
token(11) + "wrong order" + token(10),
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_complex_grammar() {
|
|
// Test case for a more complex grammar, with both failure strings and success strings
|
|
test_grammar(
|
|
"medium complexity grammar",
|
|
// Grammar
|
|
R"""(
|
|
root ::= expression
|
|
expression ::= term ws (("+"|"-") ws term)*
|
|
term ::= factor ws (("*"|"/") ws factor)*
|
|
factor ::= number | variable | "(" expression ")" | function-call
|
|
number ::= [0-9]+
|
|
variable ::= [a-zA-Z_][a-zA-Z0-9_]*
|
|
function-call ::= variable ws "(" (expression ("," ws expression)*)? ")"
|
|
ws ::= [ \t\n\r]?)""",
|
|
// Passing strings
|
|
{
|
|
"42",
|
|
"1*2*3*4*5",
|
|
"x",
|
|
"x+10",
|
|
"x1+y2",
|
|
"(a+b)*(c-d)",
|
|
"func()",
|
|
"func(x,y+2)",
|
|
"a*(b+c)-d/e",
|
|
"f(g(x),h(y,z))",
|
|
"x + 10",
|
|
"x1 + y2",
|
|
"(a + b) * (c - d)",
|
|
"func()",
|
|
"func(x, y + 2)",
|
|
"a * (b + c) - d / e",
|
|
"f(g(x), h(y, z))",
|
|
"123+456",
|
|
"123*456*789-123/456+789*123",
|
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"+",
|
|
"/ 3x",
|
|
"x + + y",
|
|
"a * / b",
|
|
"func(,)",
|
|
"func(x y)",
|
|
"(a + b",
|
|
"x + y)",
|
|
"a + b * (c - d",
|
|
"42 +",
|
|
"x +",
|
|
"x + 10 +",
|
|
"(a + b) * (c - d",
|
|
"func(",
|
|
"func(x, y + 2",
|
|
"a * (b + c) - d /",
|
|
"f(g(x), h(y, z)",
|
|
"123+456*789-123/456+789*123-456/789+123*456-789/123+456*789-123/456+789*123-456/",
|
|
}
|
|
);
|
|
|
|
// Test case for a more complex grammar with tokens
|
|
test_grammar(
|
|
"complex grammar with tokens",
|
|
R"""(
|
|
root ::= reasoning+ content tool-call*
|
|
reasoning ::= <[10]> (!<[11]>)* <[11]>
|
|
content ::= <[20]> (!<[21]>)* <[21]>
|
|
tool-call ::= <[12]> name <[13]> args <[14]>
|
|
name ::= (!<[13]>)+
|
|
args ::= (!<[14]>)*)""",
|
|
// Passing strings
|
|
{
|
|
token(10) + "I am thinking" + token(11) + token(20) + "hello world!" + token(21) + token(12) + "search" + token(13) + "query=test" + token(14),
|
|
token(10) + "reasoning 1" + token(11) + token(10) + "reasoning 2" + token(11) + token(20) + token(21) + token(12) + "tool" + token(13) + token(14),
|
|
token(10) + token(11) + token(20) + "content" + token(21),
|
|
token(10) + "think" + token(12) + " nested" + token(11) + token(20) + token(10) + "more content" + token(21) + token(12) + "fn" + token(13) + "x=1,y=2" + token(14) + token(12) + "fn2" + token(13) + token(14),
|
|
token(10) + "reasoning" + token(11) + token(10) + "more" + token(11) + token(10) + "even more" + token(11) + token(20) + "text" + token(21) + token(12) + "a" + token(13) + "b" + token(14) + token(12) + "c" + token(13) + "d" + token(14),
|
|
},
|
|
// Failing strings
|
|
{
|
|
token(20) + "content only" + token(21),
|
|
token(10) + "no closing reasoning",
|
|
token(10) + token(11) + token(20) + "no closing content",
|
|
token(10) + token(11) + token(20) + token(21) + token(12) + "incomplete tool",
|
|
token(10) + token(11) + token(11) + token(20) + token(21),
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_special_chars() {
|
|
// A collection of tests to exercise special characters such as "."
|
|
test_grammar(
|
|
"special characters",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ... "abc" ...
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"abcabcabc",
|
|
"aaaabcccc",
|
|
// NOTE: Also ensures that multi-byte characters still count as a single character
|
|
"🔵🟠✅abc❌🟠🔵"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"aaabcccc",
|
|
"aaaaabcccc",
|
|
"aaaabccc",
|
|
"aaaabccccc",
|
|
"🔵🟠✅❌abc❌✅🟠🔵"
|
|
"🔵🟠abc🟠🔵"
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_quantifiers() {
|
|
// A collection of tests to exercise * + and ? quantifiers
|
|
|
|
test_grammar(
|
|
"* quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"*)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a",
|
|
"aaaaa",
|
|
"aaaaaaaaaaaaaaaaaa",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"b",
|
|
"ab",
|
|
"aab",
|
|
"ba",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
|
|
}
|
|
);
|
|
test_grammar(
|
|
"+ quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"+)""",
|
|
// Passing strings
|
|
{
|
|
"a",
|
|
"aaaaa",
|
|
"aaaaaaaaaaaaaaaaaa",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"b",
|
|
"ab",
|
|
"aab",
|
|
"ba",
|
|
"aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaab"
|
|
}
|
|
);
|
|
test_grammar(
|
|
"? quantifier",
|
|
// Grammar
|
|
R"""(root ::= "a"?)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"b",
|
|
"ab",
|
|
"aa",
|
|
"ba",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"mixed quantifiers",
|
|
// Grammar
|
|
R"""(
|
|
root ::= cons+ vowel* cons? (vowel cons)*
|
|
vowel ::= [aeiouy]
|
|
cons ::= [bcdfghjklmnpqrstvwxyz]
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"yes",
|
|
"no",
|
|
"noyes",
|
|
"crwth",
|
|
"four",
|
|
"bryyyy",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"yess",
|
|
"yesno",
|
|
"forty",
|
|
"catyyy",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple exact repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{4}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"aaaa",
|
|
"bbbb",
|
|
"abab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"a",
|
|
"b",
|
|
"aaaaa",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple min repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{4,}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"aaaa",
|
|
"aaaaab",
|
|
"bbbb",
|
|
"ababab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"aba",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"simple max repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= [ab]{0,4}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"a",
|
|
"aa",
|
|
"aaa",
|
|
"aaab",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"aaaaa",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"min / max repetition",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ("0x" [A-F0-9]{2} " "?){3,5}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"0xFF 0x12 0xAB",
|
|
"0xFF 0x12 0xAB 0x00 0x00",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"0xFF",
|
|
"0xFF 0x12",
|
|
"0xFF 0x12 0xAB 0x00 0x00 0x00",
|
|
}
|
|
);
|
|
test_grammar(
|
|
"segfault",
|
|
// Grammar
|
|
R"""(
|
|
root ::= ( [x]* )*
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"",
|
|
"x",
|
|
"xx"
|
|
},
|
|
// Failing strings
|
|
{
|
|
"y",
|
|
"yy"
|
|
}
|
|
);
|
|
}
|
|
|
|
static void test_failure_missing_root() {
|
|
fprintf(stderr, "⚫ Testing missing root node:\n");
|
|
// Test case for a grammar that is missing a root rule
|
|
const std::string grammar_str = R"""(
|
|
rot ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= number
|
|
number ::= [0-9]+)""";
|
|
|
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
|
|
// Ensure we parsed correctly
|
|
assert(!parsed_grammar.rules.empty());
|
|
|
|
// Ensure we do NOT have a root node
|
|
assert(parsed_grammar.symbol_ids.find("root") == parsed_grammar.symbol_ids.end());
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_missing_reference() {
|
|
fprintf(stderr, "⚫ Testing missing reference node:\n");
|
|
|
|
// Test case for a grammar that is missing a referenced rule
|
|
const std::string grammar_str =
|
|
R"""(root ::= expr
|
|
expr ::= term ("+" term)*
|
|
term ::= numero
|
|
number ::= [0-9]+)""";
|
|
|
|
fprintf(stderr, " Expected error: ");
|
|
|
|
grammar_parser::parse_state parsed_grammar = grammar_parser::parse(grammar_str.c_str());
|
|
|
|
// Ensure we did NOT parsed correctly
|
|
assert(parsed_grammar.rules.empty());
|
|
|
|
fprintf(stderr, " End of expected error.\n");
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_left_recursion() {
|
|
fprintf(stderr, "⚫ Testing left recursion detection:\n");
|
|
|
|
// Test simple left recursion detection
|
|
const std::string simple_str = R"""(root ::= "a" | root "a")""";
|
|
assert(test_build_grammar_fails(simple_str));
|
|
|
|
// Test more complicated left recursion detection
|
|
const std::string medium_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | asdf "a"
|
|
)""";
|
|
assert(test_build_grammar_fails(medium_str));
|
|
|
|
// Test even more complicated left recursion detection
|
|
const std::string hard_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | foo "b"
|
|
foo ::= "c" | asdf "d" | "e")""";
|
|
assert(test_build_grammar_fails(hard_str));
|
|
|
|
// Test yet even more complicated left recursion detection
|
|
const std::string hardest_str = R"""(
|
|
root ::= asdf
|
|
asdf ::= "a" | foo "b"
|
|
foo ::= "c" | empty asdf "d" | "e"
|
|
empty ::= "blah" | )""";
|
|
assert(test_build_grammar_fails(hardest_str));
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_failure_missing_root_symbol() {
|
|
fprintf(stderr, "⚫ Testing missing root symbol:\n");
|
|
|
|
const std::string grammar_str = R"""(
|
|
root ::= "foobar"
|
|
)""";
|
|
|
|
llama_grammar * failure_result = build_grammar_with_root(grammar_str, "nonexistent");
|
|
assert(failure_result == nullptr);
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_custom_root_symbol_check() {
|
|
fprintf(stderr, "⚫ Testing custom root symbol check:\n");
|
|
|
|
const std::string custom_root_grammar_str = R"""(
|
|
foobar ::= "foobar"
|
|
)""";
|
|
|
|
llama_grammar * failure_result = build_grammar_with_root(custom_root_grammar_str, "root");
|
|
assert(failure_result == nullptr);
|
|
|
|
llama_grammar * success_result = build_grammar_with_root(custom_root_grammar_str, "foobar");
|
|
assert(success_result != nullptr);
|
|
llama_grammar_free_impl(success_result);
|
|
|
|
fprintf(stderr, " ✅︎ Passed\n");
|
|
}
|
|
|
|
static void test_json_schema() {
|
|
// Note that this is similar to the regular grammar tests,
|
|
// but we convert each json schema to a grammar before parsing.
|
|
// Otherwise, this test structure is the same.
|
|
|
|
test_schema(
|
|
"empty schema (object)",
|
|
// Schema
|
|
R"""(
|
|
{}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
R"""({})""",
|
|
R"""({"foo": "bar"})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"[]",
|
|
"null",
|
|
R"""("")""",
|
|
"true",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"exotic formats (list)",
|
|
// Schema
|
|
R"""({
|
|
"items": [
|
|
{ "format": "date" },
|
|
{ "format": "uuid" },
|
|
{ "format": "time" },
|
|
{ "format": "date-time" }
|
|
]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
// "{}", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
// "[]", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
R"""(["2012-04-23", "12345678-1234-1234-1234-1234567890ab", "18:25:43.511Z", "2012-04-23T18:25:43.511Z"])""",
|
|
//R"""(["2012-04-23","12345678-1234-1234-1234-1234567890ab"])""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
//R"""({"foo": "bar"})""", // NOTE: This string passes for this schema on https://www.jsonschemavalidator.net/ -- should it?
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""(["foo", "bar"])""",
|
|
R"""(["12345678-1234-1234-1234-1234567890ab"])""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string",
|
|
// Schema
|
|
R"""({
|
|
"type": "string"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({})""",
|
|
R"""("foo": "bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min length 1",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 1
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""({})""",
|
|
R"""("foo": "bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min length 3",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 3
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("foobar")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("f")""",
|
|
R"""("fo")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ max length",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"maxLength": 3
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("")""",
|
|
R"""("f")""",
|
|
R"""("fo")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("foobar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string w/ min & max length",
|
|
// Schema
|
|
R"""({
|
|
"type": "string",
|
|
"minLength": 1,
|
|
"maxLength": 4
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
R"""("bar")""",
|
|
R"""("f")""",
|
|
R"""("barf")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("barfo")""",
|
|
R"""("foobar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"boolean",
|
|
// Schema
|
|
R"""({
|
|
"type": "boolean"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
"true",
|
|
"false",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("true")""",
|
|
R"""(True)""",
|
|
R"""(FALSE)""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"integer",
|
|
// Schema
|
|
R"""({
|
|
"type": "integer"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""(0)""",
|
|
R"""(12345)""",
|
|
R"""(1234567890123456)""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(01)""",
|
|
R"""(007)""",
|
|
R"""(12345678901234567 )""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"string const",
|
|
// Schema
|
|
R"""({
|
|
"const": "foo"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("foo")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""(foo)""",
|
|
R"""("bar")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"non-string const",
|
|
// Schema
|
|
R"""({
|
|
"const": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""(true)""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(foo)""",
|
|
R"""("true")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"non-string const",
|
|
// Schema
|
|
R"""({
|
|
"enum": ["red", "amber", "green", null, 42, ["foo"]]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("red")""",
|
|
R"""(null)""",
|
|
R"""(42)""",
|
|
R"""(["foo"])""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""(420)""",
|
|
R"""(true)""",
|
|
R"""(foo)""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"simple pattern",
|
|
// Schema
|
|
R"""({
|
|
"pattern": "^[a-zA-Z0-9_-]*$"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("")""",
|
|
R"""("He_llo-12")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("!")""",
|
|
R"""("Hello World")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"pattern with escapes",
|
|
// Schema
|
|
R"""({
|
|
"pattern": "^a\\^\\$\\.\\[\\]\\(\\)\\|\\{\\}\\*\\+\\?b$"
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""("a^$.[]()|{}*+?b")""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""("ab")""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"",
|
|
// Schema
|
|
R"""(
|
|
{
|
|
"type": ["array", "null"],
|
|
"items": { "type": "string" }
|
|
}
|
|
)""",
|
|
// Passing strings
|
|
{
|
|
"null",
|
|
"[]",
|
|
"[\"123\"]",
|
|
"[\"foo\", \"bar\"]",
|
|
},
|
|
// Failing strings
|
|
{
|
|
"",
|
|
"[123]",
|
|
"\"foo\"",
|
|
"[\"foo\", 42]",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"min+max items",
|
|
// Schema
|
|
R"""({
|
|
"items": {
|
|
"type": ["number", "integer"]
|
|
},
|
|
"minItems": 3,
|
|
"maxItems": 5
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""([1, 2, 3])""",
|
|
R"""([1, 2, 3, 4])""",
|
|
R"""([1, 2, 3, 4, 5])""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""([1, 2])""",
|
|
R"""([1, 2, 3, 4, 5, 6])""",
|
|
R"""(1)""",
|
|
}
|
|
);
|
|
|
|
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
test_schema(
|
|
"object properties",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
}
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// "By default, leaving out properties is valid"
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
// "By extension, even an empty object is valid"
|
|
R"""({})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Change datatype from number to string
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Reorder properties
|
|
R"""({ "street_name": "Pennsylvania", "number": 1600 })""",
|
|
// Reorder properties
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// "Additional properties default to false for generation, even though the spec says true.
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"additional properties can't override other properties",
|
|
R"""({
|
|
"properties": {
|
|
"a": {"type": "integer"},
|
|
"b": {"type": "integer"}
|
|
},
|
|
"additionalProperties": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"a": 42})""",
|
|
R"""({"c": ""})""",
|
|
R"""({"a": 42, "c": ""})""",
|
|
R"""({"a_": ""})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""()""",
|
|
R"""({"a": ""})""",
|
|
R"""({"a": "", "b": ""})""",
|
|
}
|
|
);
|
|
|
|
// Properties (from: https://json-schema.org/understanding-json-schema/reference/object#properties)
|
|
test_schema(
|
|
"object properties, additionalProperties: true",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
},
|
|
"additionalProperties": true
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
// "By extension, even an empty object is valid"
|
|
R"""({})""",
|
|
R"""({"number":1600,"street_name":"Pennsylvania","street_type":"Avenue"})""",
|
|
// "By default, leaving out properties is valid"
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
// "By default, providing additional properties is valid"
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue", "direction":"NW"})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Change datatype from number to string
|
|
R"""({ "number": "1600", "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Reorder properties
|
|
R"""({ "street_name": "Pennsylvania", "number": 1600, "street_type":"Avenue"})""",
|
|
}
|
|
);
|
|
|
|
// Additional properties: false
|
|
test_schema(
|
|
"required + optional props each in original order",
|
|
// Schema
|
|
R"""({
|
|
"type": "object",
|
|
"properties": {
|
|
"number": { "type": "number" },
|
|
"street_name": { "type": "string" },
|
|
"street_type": { "enum": ["Street", "Avenue", "Boulevard"] }
|
|
},
|
|
"additionalProperties": false
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({ "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_type":"Avenue"})""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania" })""",
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type":"Avenue"})""",
|
|
// Spaces are permitted around enum values
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue" })""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
// Reorder properties
|
|
R"""({ "street_type": "Avenue", "number": 1600 })""",
|
|
// Add "direction"
|
|
R"""({ "number": 1600, "street_name": "Pennsylvania", "street_type": "Avenue", "direction": "NW" })""",
|
|
}
|
|
);
|
|
|
|
test_schema(
|
|
"required + optional props each in original order",
|
|
// Schema
|
|
R"""({
|
|
"properties": {
|
|
"b": {"type": "string"},
|
|
"a": {"type": "string"},
|
|
"d": {"type": "string"},
|
|
"c": {"type": "string"}
|
|
},
|
|
"required": ["a", "b"],
|
|
"additionalProperties": false
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"b": "foo", "a": "bar"})""",
|
|
R"""({"b":"foo","a":"bar","d":"qux"})""",
|
|
R"""({"b":"foo", "a":"bar", "d":"qux", "c":"baz"})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({"a": "foo", "b": "bar"})""",
|
|
R"""({"b": "bar"})""",
|
|
R"""({"a": "foo", "c": "baz"})""",
|
|
R"""({"a":"foo", "b":"bar", "c":"baz", "d":"qux"})""",
|
|
}
|
|
);
|
|
|
|
// NOTE: Example from https://json-schema.org/learn/getting-started-step-by-step#define-required-properties
|
|
test_schema(
|
|
"required props",
|
|
// Schema
|
|
R"""({
|
|
"$schema": "https://json-schema.org/draft/2020-12/schema",
|
|
"$id": "https://example.com/product.schema.json",
|
|
"title": "Product",
|
|
"description": "A product from Acme's catalog",
|
|
"type": "object",
|
|
"properties": {
|
|
"productId": {
|
|
"description": "The unique identifier for a product",
|
|
"type": "integer"
|
|
},
|
|
"productName": {
|
|
"description": "Name of the product",
|
|
"type": "string"
|
|
},
|
|
"price": {
|
|
"description": "The price of the product",
|
|
"type": "number",
|
|
"exclusiveMinimum": 0
|
|
},
|
|
"tags": {
|
|
"description": "Tags for the product",
|
|
"type": "array",
|
|
"items": {
|
|
"type": "string"
|
|
},
|
|
"minItems": 1,
|
|
"uniqueItems": true
|
|
},
|
|
"dimensions": {
|
|
"type": "object",
|
|
"properties": {
|
|
"length": {
|
|
"type": "number"
|
|
},
|
|
"width": {
|
|
"type": "number"
|
|
},
|
|
"height": {
|
|
"type": "number"
|
|
}
|
|
},
|
|
"required": [ "length", "width", "height" ]
|
|
}
|
|
},
|
|
"required": [ "productId", "productName", "price" ]
|
|
})""",
|
|
// Passing strings
|
|
{
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50})""",
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"]})""",
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green"], "dimensions": {"length": 785, "width": 250.5, "height": -0.359}})""",
|
|
},
|
|
// Failing strings
|
|
{
|
|
R"""({})""", // Missing all required properties
|
|
R"""({"productName": "A green door", "price": 12.50, "productId": 1})""", // Out of order properties
|
|
// TODO: The following line should fail, but currently it passes. `exclusiveMinimum` is not supported, as it would likely be too difficult to implement.
|
|
// Perhaps special checks for minimum and maximum values of 0 could be added (since that's relatively easy to do with grammars), but anything else would likely be too complex.
|
|
// R"""({"productId": 1, "productName": "A green door", "price": -12.50})""",
|
|
R"""({"productId": 1, "productName": "A green door"})""", // Missing required property (price)
|
|
R"""({"productName": "A green door", "price": 12.50})""", // Missing required property (productId)
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": []})""", // tags is empty, but minItems is 1
|
|
R"""({"productId": 1, "productName": "A green door", "price": 12.50, "dimensions": {"length": 785, "width": 250.5, "height": -0.359}, "tags": ["home", "green"]})""", // Tags and dimensions are out of order
|
|
// TODO: The following line should fail, but currently it passes. `uniqueItems` is not supported, as it would likely be too difficult to implement.
|
|
// R"""({"productId": 1, "productName": "A green door", "price": 12.50, "tags": ["home", "green", "home"]})""",
|
|
}
|
|
);
|
|
}
|
|
|
|
int main() {
|
|
fprintf(stdout, "Running grammar integration tests...\n");
|
|
test_simple_grammar();
|
|
test_complex_grammar();
|
|
test_special_chars();
|
|
test_quantifiers();
|
|
test_failure_missing_root();
|
|
test_failure_missing_reference();
|
|
test_failure_left_recursion();
|
|
test_failure_missing_root_symbol();
|
|
test_custom_root_symbol_check();
|
|
test_json_schema();
|
|
fprintf(stdout, "All tests passed.\n");
|
|
return 0;
|
|
}
|