mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-06-28 04:30:15 -05:00
* initial commit * cleanup * fix whitelist arg parsing and simplify keyword search state * rename white* to allow* * add vocab_pieces init function, rename update functions, delete accidentally added file * delete temporary bias code * auto-generate fill function with script data inside * deduplicate allowlist unicode rule parsing * minor cleanup * delete unnecessary header * refactor allowlist to support sequential rule sets via keywords * add early exit for zero-rules case * delete accidentally added file
2006 lines
29 KiB
C++
2006 lines
29 KiB
C++
// generated with scripts/gen-unicode-script-data.py
|
|
|
|
#include "unicode.h"
|
|
#include "unicode-data.h"
|
|
|
|
size_t unicode_fill_from_utf8(std::string* utf8, std::vector<uint32_t>* dst_cpts, std::vector<std::string>* dst_scripts) {
|
|
if (utf8 == nullptr) {
|
|
return 0;
|
|
}
|
|
|
|
static const std::vector<std::string> unicode_scripts = {
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"bopomofo",
|
|
"common",
|
|
"inherited",
|
|
"greek",
|
|
"common",
|
|
"greek",
|
|
"greek",
|
|
"common",
|
|
"greek",
|
|
"greek",
|
|
"common",
|
|
"greek",
|
|
"common",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"coptic",
|
|
"greek",
|
|
"cyrillic",
|
|
"inherited",
|
|
"cyrillic",
|
|
"armenian",
|
|
"armenian",
|
|
"armenian",
|
|
"hebrew",
|
|
"hebrew",
|
|
"hebrew",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"inherited",
|
|
"arabic",
|
|
"inherited",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"syriac",
|
|
"syriac",
|
|
"syriac",
|
|
"arabic",
|
|
"thaana",
|
|
"nko",
|
|
"nko",
|
|
"samaritan",
|
|
"samaritan",
|
|
"mandaic",
|
|
"mandaic",
|
|
"syriac",
|
|
"arabic",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"devanagari",
|
|
"inherited",
|
|
"devanagari",
|
|
"common",
|
|
"devanagari",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"bengali",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gurmukhi",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"gujarati",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"oriya",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"tamil",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"telugu",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"kannada",
|
|
"malayalam",
|
|
"malayalam",
|
|
"malayalam",
|
|
"malayalam",
|
|
"malayalam",
|
|
"malayalam",
|
|
"malayalam",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"sinhala",
|
|
"thai",
|
|
"common",
|
|
"thai",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"lao",
|
|
"tibetan",
|
|
"tibetan",
|
|
"tibetan",
|
|
"tibetan",
|
|
"tibetan",
|
|
"tibetan",
|
|
"common",
|
|
"tibetan",
|
|
"myanmar",
|
|
"georgian",
|
|
"georgian",
|
|
"georgian",
|
|
"georgian",
|
|
"common",
|
|
"georgian",
|
|
"hangul",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"cherokee",
|
|
"cherokee",
|
|
"canadian_aboriginal",
|
|
"ogham",
|
|
"runic",
|
|
"common",
|
|
"runic",
|
|
"tagalog",
|
|
"tagalog",
|
|
"hanunoo",
|
|
"common",
|
|
"buhid",
|
|
"tagbanwa",
|
|
"tagbanwa",
|
|
"tagbanwa",
|
|
"khmer",
|
|
"khmer",
|
|
"khmer",
|
|
"mongolian",
|
|
"common",
|
|
"mongolian",
|
|
"common",
|
|
"mongolian",
|
|
"mongolian",
|
|
"mongolian",
|
|
"canadian_aboriginal",
|
|
"limbu",
|
|
"limbu",
|
|
"limbu",
|
|
"limbu",
|
|
"limbu",
|
|
"tai_le",
|
|
"tai_le",
|
|
"new_tai_lue",
|
|
"new_tai_lue",
|
|
"new_tai_lue",
|
|
"new_tai_lue",
|
|
"khmer",
|
|
"buginese",
|
|
"buginese",
|
|
"tai_tham",
|
|
"tai_tham",
|
|
"tai_tham",
|
|
"tai_tham",
|
|
"tai_tham",
|
|
"inherited",
|
|
"inherited",
|
|
"balinese",
|
|
"balinese",
|
|
"sundanese",
|
|
"batak",
|
|
"batak",
|
|
"lepcha",
|
|
"lepcha",
|
|
"lepcha",
|
|
"ol_chiki",
|
|
"cyrillic",
|
|
"georgian",
|
|
"georgian",
|
|
"sundanese",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"latin",
|
|
"greek",
|
|
"cyrillic",
|
|
"latin",
|
|
"greek",
|
|
"latin",
|
|
"greek",
|
|
"latin",
|
|
"cyrillic",
|
|
"latin",
|
|
"greek",
|
|
"inherited",
|
|
"latin",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"greek",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"greek",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"braille",
|
|
"common",
|
|
"common",
|
|
"glagolitic",
|
|
"latin",
|
|
"coptic",
|
|
"coptic",
|
|
"georgian",
|
|
"georgian",
|
|
"georgian",
|
|
"tifinagh",
|
|
"tifinagh",
|
|
"tifinagh",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"cyrillic",
|
|
"common",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"common",
|
|
"han",
|
|
"common",
|
|
"han",
|
|
"common",
|
|
"han",
|
|
"inherited",
|
|
"hangul",
|
|
"common",
|
|
"han",
|
|
"common",
|
|
"hiragana",
|
|
"inherited",
|
|
"common",
|
|
"hiragana",
|
|
"common",
|
|
"katakana",
|
|
"common",
|
|
"katakana",
|
|
"bopomofo",
|
|
"hangul",
|
|
"common",
|
|
"bopomofo",
|
|
"common",
|
|
"common",
|
|
"katakana",
|
|
"hangul",
|
|
"common",
|
|
"hangul",
|
|
"common",
|
|
"katakana",
|
|
"common",
|
|
"katakana",
|
|
"common",
|
|
"han",
|
|
"common",
|
|
"han",
|
|
"yi",
|
|
"yi",
|
|
"lisu",
|
|
"vai",
|
|
"cyrillic",
|
|
"bamum",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"latin",
|
|
"syloti_nagri",
|
|
"common",
|
|
"phags_pa",
|
|
"saurashtra",
|
|
"saurashtra",
|
|
"devanagari",
|
|
"kayah_li",
|
|
"common",
|
|
"kayah_li",
|
|
"rejang",
|
|
"rejang",
|
|
"hangul",
|
|
"javanese",
|
|
"common",
|
|
"javanese",
|
|
"javanese",
|
|
"myanmar",
|
|
"cham",
|
|
"cham",
|
|
"cham",
|
|
"cham",
|
|
"myanmar",
|
|
"tai_viet",
|
|
"tai_viet",
|
|
"meetei_mayek",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"greek",
|
|
"latin",
|
|
"common",
|
|
"cherokee",
|
|
"meetei_mayek",
|
|
"meetei_mayek",
|
|
"hangul",
|
|
"hangul",
|
|
"hangul",
|
|
"han",
|
|
"han",
|
|
"latin",
|
|
"armenian",
|
|
"hebrew",
|
|
"hebrew",
|
|
"hebrew",
|
|
"hebrew",
|
|
"hebrew",
|
|
"hebrew",
|
|
"arabic",
|
|
"common",
|
|
"arabic",
|
|
"arabic",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"cyrillic",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"arabic",
|
|
"arabic",
|
|
"common",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"latin",
|
|
"common",
|
|
"katakana",
|
|
"common",
|
|
"katakana",
|
|
"common",
|
|
"hangul",
|
|
"hangul",
|
|
"hangul",
|
|
"hangul",
|
|
"hangul",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"linear_b",
|
|
"linear_b",
|
|
"linear_b",
|
|
"linear_b",
|
|
"linear_b",
|
|
"linear_b",
|
|
"linear_b",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"greek",
|
|
"common",
|
|
"greek",
|
|
"common",
|
|
"inherited",
|
|
"lycian",
|
|
"carian",
|
|
"inherited",
|
|
"common",
|
|
"old_italic",
|
|
"old_italic",
|
|
"gothic",
|
|
"old_permic",
|
|
"ugaritic",
|
|
"ugaritic",
|
|
"old_persian",
|
|
"old_persian",
|
|
"deseret",
|
|
"shavian",
|
|
"osmanya",
|
|
"osmanya",
|
|
"osage",
|
|
"osage",
|
|
"elbasan",
|
|
"caucasian_albanian",
|
|
"caucasian_albanian",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"vithkuqi",
|
|
"todhri",
|
|
"linear_a",
|
|
"linear_a",
|
|
"linear_a",
|
|
"latin",
|
|
"latin",
|
|
"latin",
|
|
"cypriot",
|
|
"cypriot",
|
|
"cypriot",
|
|
"cypriot",
|
|
"cypriot",
|
|
"cypriot",
|
|
"imperial_aramaic",
|
|
"imperial_aramaic",
|
|
"palmyrene",
|
|
"nabataean",
|
|
"nabataean",
|
|
"hatran",
|
|
"hatran",
|
|
"hatran",
|
|
"phoenician",
|
|
"phoenician",
|
|
"lydian",
|
|
"lydian",
|
|
"sidetic",
|
|
"meroitic_hieroglyphs",
|
|
"meroitic_cursive",
|
|
"meroitic_cursive",
|
|
"meroitic_cursive",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"kharoshthi",
|
|
"old_south_arabian",
|
|
"old_north_arabian",
|
|
"manichaean",
|
|
"manichaean",
|
|
"avestan",
|
|
"avestan",
|
|
"inscriptional_parthian",
|
|
"inscriptional_parthian",
|
|
"inscriptional_pahlavi",
|
|
"inscriptional_pahlavi",
|
|
"psalter_pahlavi",
|
|
"psalter_pahlavi",
|
|
"psalter_pahlavi",
|
|
"old_turkic",
|
|
"old_hungarian",
|
|
"old_hungarian",
|
|
"old_hungarian",
|
|
"hanifi_rohingya",
|
|
"hanifi_rohingya",
|
|
"garay",
|
|
"garay",
|
|
"garay",
|
|
"arabic",
|
|
"yezidi",
|
|
"yezidi",
|
|
"yezidi",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"old_sogdian",
|
|
"sogdian",
|
|
"old_uyghur",
|
|
"chorasmian",
|
|
"elymaic",
|
|
"brahmi",
|
|
"brahmi",
|
|
"brahmi",
|
|
"kaithi",
|
|
"kaithi",
|
|
"sora_sompeng",
|
|
"sora_sompeng",
|
|
"chakma",
|
|
"chakma",
|
|
"mahajani",
|
|
"sharada",
|
|
"sinhala",
|
|
"khojki",
|
|
"khojki",
|
|
"multani",
|
|
"multani",
|
|
"multani",
|
|
"multani",
|
|
"multani",
|
|
"khudawadi",
|
|
"khudawadi",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"inherited",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"grantha",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"tulu_tigalari",
|
|
"newa",
|
|
"newa",
|
|
"tirhuta",
|
|
"tirhuta",
|
|
"siddham",
|
|
"siddham",
|
|
"modi",
|
|
"modi",
|
|
"mongolian",
|
|
"takri",
|
|
"takri",
|
|
"myanmar",
|
|
"ahom",
|
|
"ahom",
|
|
"ahom",
|
|
"dogra",
|
|
"warang_citi",
|
|
"warang_citi",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"dives_akuru",
|
|
"nandinagari",
|
|
"nandinagari",
|
|
"nandinagari",
|
|
"zanabazar_square",
|
|
"soyombo",
|
|
"canadian_aboriginal",
|
|
"pau_cin_hau",
|
|
"devanagari",
|
|
"sharada",
|
|
"sunuwar",
|
|
"sunuwar",
|
|
"bhaiksuki",
|
|
"bhaiksuki",
|
|
"bhaiksuki",
|
|
"bhaiksuki",
|
|
"marchen",
|
|
"marchen",
|
|
"marchen",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"masaram_gondi",
|
|
"gunjala_gondi",
|
|
"gunjala_gondi",
|
|
"gunjala_gondi",
|
|
"gunjala_gondi",
|
|
"gunjala_gondi",
|
|
"gunjala_gondi",
|
|
"tolong_siki",
|
|
"tolong_siki",
|
|
"makasar",
|
|
"kawi",
|
|
"kawi",
|
|
"kawi",
|
|
"lisu",
|
|
"tamil",
|
|
"tamil",
|
|
"cuneiform",
|
|
"cuneiform",
|
|
"cuneiform",
|
|
"cuneiform",
|
|
"cypro_minoan",
|
|
"egyptian_hieroglyphs",
|
|
"egyptian_hieroglyphs",
|
|
"anatolian_hieroglyphs",
|
|
"gurung_khema",
|
|
"bamum",
|
|
"mro",
|
|
"mro",
|
|
"mro",
|
|
"tangsa",
|
|
"tangsa",
|
|
"bassa_vah",
|
|
"bassa_vah",
|
|
"pahawh_hmong",
|
|
"pahawh_hmong",
|
|
"pahawh_hmong",
|
|
"pahawh_hmong",
|
|
"pahawh_hmong",
|
|
"kirat_rai",
|
|
"medefaidrin",
|
|
"beria_erfe",
|
|
"beria_erfe",
|
|
"miao",
|
|
"miao",
|
|
"miao",
|
|
"tangut",
|
|
"nushu",
|
|
"han",
|
|
"khitan_small_script",
|
|
"han",
|
|
"tangut",
|
|
"khitan_small_script",
|
|
"khitan_small_script",
|
|
"tangut",
|
|
"tangut",
|
|
"katakana",
|
|
"katakana",
|
|
"katakana",
|
|
"katakana",
|
|
"hiragana",
|
|
"katakana",
|
|
"hiragana",
|
|
"hiragana",
|
|
"katakana",
|
|
"katakana",
|
|
"nushu",
|
|
"duployan",
|
|
"duployan",
|
|
"duployan",
|
|
"duployan",
|
|
"duployan",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"inherited",
|
|
"inherited",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"inherited",
|
|
"common",
|
|
"greek",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"signwriting",
|
|
"signwriting",
|
|
"signwriting",
|
|
"latin",
|
|
"latin",
|
|
"glagolitic",
|
|
"glagolitic",
|
|
"glagolitic",
|
|
"glagolitic",
|
|
"glagolitic",
|
|
"cyrillic",
|
|
"cyrillic",
|
|
"nyiakeng_puachue_hmong",
|
|
"nyiakeng_puachue_hmong",
|
|
"nyiakeng_puachue_hmong",
|
|
"nyiakeng_puachue_hmong",
|
|
"toto",
|
|
"wancho",
|
|
"wancho",
|
|
"nag_mundari",
|
|
"ol_onal",
|
|
"ol_onal",
|
|
"tai_yo",
|
|
"tai_yo",
|
|
"tai_yo",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"ethiopic",
|
|
"mende_kikakui",
|
|
"mende_kikakui",
|
|
"adlam",
|
|
"adlam",
|
|
"adlam",
|
|
"common",
|
|
"common",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"arabic",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"hiragana",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"common",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"han",
|
|
"common",
|
|
"common",
|
|
"inherited",
|
|
};
|
|
static const std::vector<uint32_t> unicode_script_lasts = {
|
|
0x000040,
|
|
0x00005A,
|
|
0x000060,
|
|
0x00007A,
|
|
0x0000A9,
|
|
0x0000AA,
|
|
0x0000B9,
|
|
0x0000BA,
|
|
0x0000BF,
|
|
0x0000D6,
|
|
0x0000D7,
|
|
0x0000F6,
|
|
0x0000F7,
|
|
0x0002B8,
|
|
0x0002DF,
|
|
0x0002E4,
|
|
0x0002E9,
|
|
0x0002EB,
|
|
0x0002FF,
|
|
0x00036F,
|
|
0x000373,
|
|
0x000374,
|
|
0x000377,
|
|
0x00037D,
|
|
0x00037E,
|
|
0x00037F,
|
|
0x000384,
|
|
0x000385,
|
|
0x000386,
|
|
0x000387,
|
|
0x00038A,
|
|
0x00038C,
|
|
0x0003A1,
|
|
0x0003E1,
|
|
0x0003EF,
|
|
0x0003FF,
|
|
0x000484,
|
|
0x000486,
|
|
0x00052F,
|
|
0x000556,
|
|
0x00058A,
|
|
0x00058F,
|
|
0x0005C7,
|
|
0x0005EA,
|
|
0x0005F4,
|
|
0x000604,
|
|
0x000605,
|
|
0x00060B,
|
|
0x00060C,
|
|
0x00061A,
|
|
0x00061B,
|
|
0x00061E,
|
|
0x00061F,
|
|
0x00063F,
|
|
0x000640,
|
|
0x00064A,
|
|
0x000655,
|
|
0x00066F,
|
|
0x000670,
|
|
0x0006DC,
|
|
0x0006DD,
|
|
0x0006FF,
|
|
0x00070D,
|
|
0x00074A,
|
|
0x00074F,
|
|
0x00077F,
|
|
0x0007B1,
|
|
0x0007FA,
|
|
0x0007FF,
|
|
0x00082D,
|
|
0x00083E,
|
|
0x00085B,
|
|
0x00085E,
|
|
0x00086A,
|
|
0x000891,
|
|
0x0008E1,
|
|
0x0008E2,
|
|
0x0008FF,
|
|
0x000950,
|
|
0x000954,
|
|
0x000963,
|
|
0x000965,
|
|
0x00097F,
|
|
0x000983,
|
|
0x00098C,
|
|
0x000990,
|
|
0x0009A8,
|
|
0x0009B0,
|
|
0x0009B2,
|
|
0x0009B9,
|
|
0x0009C4,
|
|
0x0009C8,
|
|
0x0009CE,
|
|
0x0009D7,
|
|
0x0009DD,
|
|
0x0009E3,
|
|
0x0009FE,
|
|
0x000A03,
|
|
0x000A0A,
|
|
0x000A10,
|
|
0x000A28,
|
|
0x000A30,
|
|
0x000A33,
|
|
0x000A36,
|
|
0x000A39,
|
|
0x000A3C,
|
|
0x000A42,
|
|
0x000A48,
|
|
0x000A4D,
|
|
0x000A51,
|
|
0x000A5C,
|
|
0x000A5E,
|
|
0x000A76,
|
|
0x000A83,
|
|
0x000A8D,
|
|
0x000A91,
|
|
0x000AA8,
|
|
0x000AB0,
|
|
0x000AB3,
|
|
0x000AB9,
|
|
0x000AC5,
|
|
0x000AC9,
|
|
0x000ACD,
|
|
0x000AD0,
|
|
0x000AE3,
|
|
0x000AF1,
|
|
0x000AFF,
|
|
0x000B03,
|
|
0x000B0C,
|
|
0x000B10,
|
|
0x000B28,
|
|
0x000B30,
|
|
0x000B33,
|
|
0x000B39,
|
|
0x000B44,
|
|
0x000B48,
|
|
0x000B4D,
|
|
0x000B57,
|
|
0x000B5D,
|
|
0x000B63,
|
|
0x000B77,
|
|
0x000B83,
|
|
0x000B8A,
|
|
0x000B90,
|
|
0x000B95,
|
|
0x000B9A,
|
|
0x000B9C,
|
|
0x000B9F,
|
|
0x000BA4,
|
|
0x000BAA,
|
|
0x000BB9,
|
|
0x000BC2,
|
|
0x000BC8,
|
|
0x000BCD,
|
|
0x000BD0,
|
|
0x000BD7,
|
|
0x000BFA,
|
|
0x000C0C,
|
|
0x000C10,
|
|
0x000C28,
|
|
0x000C39,
|
|
0x000C44,
|
|
0x000C48,
|
|
0x000C4D,
|
|
0x000C56,
|
|
0x000C5A,
|
|
0x000C5D,
|
|
0x000C63,
|
|
0x000C6F,
|
|
0x000C7F,
|
|
0x000C8C,
|
|
0x000C90,
|
|
0x000CA8,
|
|
0x000CB3,
|
|
0x000CB9,
|
|
0x000CC4,
|
|
0x000CC8,
|
|
0x000CCD,
|
|
0x000CD6,
|
|
0x000CDE,
|
|
0x000CE3,
|
|
0x000CEF,
|
|
0x000CF3,
|
|
0x000D0C,
|
|
0x000D10,
|
|
0x000D44,
|
|
0x000D48,
|
|
0x000D4F,
|
|
0x000D63,
|
|
0x000D7F,
|
|
0x000D83,
|
|
0x000D96,
|
|
0x000DB1,
|
|
0x000DBB,
|
|
0x000DBD,
|
|
0x000DC6,
|
|
0x000DCA,
|
|
0x000DD4,
|
|
0x000DD6,
|
|
0x000DDF,
|
|
0x000DEF,
|
|
0x000DF4,
|
|
0x000E3A,
|
|
0x000E3F,
|
|
0x000E5B,
|
|
0x000E82,
|
|
0x000E84,
|
|
0x000E8A,
|
|
0x000EA3,
|
|
0x000EA5,
|
|
0x000EBD,
|
|
0x000EC4,
|
|
0x000EC6,
|
|
0x000ECE,
|
|
0x000ED9,
|
|
0x000EDF,
|
|
0x000F47,
|
|
0x000F6C,
|
|
0x000F97,
|
|
0x000FBC,
|
|
0x000FCC,
|
|
0x000FD4,
|
|
0x000FD8,
|
|
0x000FDA,
|
|
0x00109F,
|
|
0x0010C5,
|
|
0x0010C7,
|
|
0x0010CD,
|
|
0x0010FA,
|
|
0x0010FB,
|
|
0x0010FF,
|
|
0x0011FF,
|
|
0x001248,
|
|
0x00124D,
|
|
0x001256,
|
|
0x001258,
|
|
0x00125D,
|
|
0x001288,
|
|
0x00128D,
|
|
0x0012B0,
|
|
0x0012B5,
|
|
0x0012BE,
|
|
0x0012C0,
|
|
0x0012C5,
|
|
0x0012D6,
|
|
0x001310,
|
|
0x001315,
|
|
0x00135A,
|
|
0x00137C,
|
|
0x001399,
|
|
0x0013F5,
|
|
0x0013FD,
|
|
0x00167F,
|
|
0x00169C,
|
|
0x0016EA,
|
|
0x0016ED,
|
|
0x0016F8,
|
|
0x001715,
|
|
0x00171F,
|
|
0x001734,
|
|
0x001736,
|
|
0x001753,
|
|
0x00176C,
|
|
0x001770,
|
|
0x001773,
|
|
0x0017DD,
|
|
0x0017E9,
|
|
0x0017F9,
|
|
0x001801,
|
|
0x001803,
|
|
0x001804,
|
|
0x001805,
|
|
0x001819,
|
|
0x001878,
|
|
0x0018AA,
|
|
0x0018F5,
|
|
0x00191E,
|
|
0x00192B,
|
|
0x00193B,
|
|
0x001940,
|
|
0x00194F,
|
|
0x00196D,
|
|
0x001974,
|
|
0x0019AB,
|
|
0x0019C9,
|
|
0x0019DA,
|
|
0x0019DF,
|
|
0x0019FF,
|
|
0x001A1B,
|
|
0x001A1F,
|
|
0x001A5E,
|
|
0x001A7C,
|
|
0x001A89,
|
|
0x001A99,
|
|
0x001AAD,
|
|
0x001ADD,
|
|
0x001AEB,
|
|
0x001B4C,
|
|
0x001B7F,
|
|
0x001BBF,
|
|
0x001BF3,
|
|
0x001BFF,
|
|
0x001C37,
|
|
0x001C49,
|
|
0x001C4F,
|
|
0x001C7F,
|
|
0x001C8A,
|
|
0x001CBA,
|
|
0x001CBF,
|
|
0x001CC7,
|
|
0x001CD2,
|
|
0x001CD3,
|
|
0x001CE0,
|
|
0x001CE1,
|
|
0x001CE8,
|
|
0x001CEC,
|
|
0x001CED,
|
|
0x001CF3,
|
|
0x001CF4,
|
|
0x001CF7,
|
|
0x001CF9,
|
|
0x001CFA,
|
|
0x001D25,
|
|
0x001D2A,
|
|
0x001D2B,
|
|
0x001D5C,
|
|
0x001D61,
|
|
0x001D65,
|
|
0x001D6A,
|
|
0x001D77,
|
|
0x001D78,
|
|
0x001DBE,
|
|
0x001DBF,
|
|
0x001DFF,
|
|
0x001EFF,
|
|
0x001F15,
|
|
0x001F1D,
|
|
0x001F45,
|
|
0x001F4D,
|
|
0x001F57,
|
|
0x001F59,
|
|
0x001F5B,
|
|
0x001F5D,
|
|
0x001F7D,
|
|
0x001FB4,
|
|
0x001FC4,
|
|
0x001FD3,
|
|
0x001FDB,
|
|
0x001FEF,
|
|
0x001FF4,
|
|
0x001FFE,
|
|
0x00200B,
|
|
0x00200D,
|
|
0x002064,
|
|
0x002070,
|
|
0x002071,
|
|
0x00207E,
|
|
0x00207F,
|
|
0x00208E,
|
|
0x00209C,
|
|
0x0020C1,
|
|
0x0020F0,
|
|
0x002125,
|
|
0x002126,
|
|
0x002129,
|
|
0x00212B,
|
|
0x002131,
|
|
0x002132,
|
|
0x00214D,
|
|
0x00214E,
|
|
0x00215F,
|
|
0x002188,
|
|
0x00218B,
|
|
0x002429,
|
|
0x00244A,
|
|
0x0027FF,
|
|
0x0028FF,
|
|
0x002B73,
|
|
0x002BFF,
|
|
0x002C5F,
|
|
0x002C7F,
|
|
0x002CF3,
|
|
0x002CFF,
|
|
0x002D25,
|
|
0x002D27,
|
|
0x002D2D,
|
|
0x002D67,
|
|
0x002D70,
|
|
0x002D7F,
|
|
0x002D96,
|
|
0x002DA6,
|
|
0x002DAE,
|
|
0x002DB6,
|
|
0x002DBE,
|
|
0x002DC6,
|
|
0x002DCE,
|
|
0x002DD6,
|
|
0x002DDE,
|
|
0x002DFF,
|
|
0x002E5D,
|
|
0x002E99,
|
|
0x002EF3,
|
|
0x002FD5,
|
|
0x003004,
|
|
0x003005,
|
|
0x003006,
|
|
0x003007,
|
|
0x003020,
|
|
0x003029,
|
|
0x00302D,
|
|
0x00302F,
|
|
0x003037,
|
|
0x00303B,
|
|
0x00303F,
|
|
0x003096,
|
|
0x00309A,
|
|
0x00309C,
|
|
0x00309F,
|
|
0x0030A0,
|
|
0x0030FA,
|
|
0x0030FC,
|
|
0x0030FF,
|
|
0x00312F,
|
|
0x00318E,
|
|
0x00319F,
|
|
0x0031BF,
|
|
0x0031E5,
|
|
0x0031EF,
|
|
0x0031FF,
|
|
0x00321E,
|
|
0x00325F,
|
|
0x00327E,
|
|
0x0032CF,
|
|
0x0032FE,
|
|
0x0032FF,
|
|
0x003357,
|
|
0x0033FF,
|
|
0x004DBF,
|
|
0x004DFF,
|
|
0x009FFF,
|
|
0x00A48C,
|
|
0x00A4C6,
|
|
0x00A4FF,
|
|
0x00A62B,
|
|
0x00A69F,
|
|
0x00A6F7,
|
|
0x00A721,
|
|
0x00A787,
|
|
0x00A78A,
|
|
0x00A7DC,
|
|
0x00A7FF,
|
|
0x00A82C,
|
|
0x00A839,
|
|
0x00A877,
|
|
0x00A8C5,
|
|
0x00A8D9,
|
|
0x00A8FF,
|
|
0x00A92D,
|
|
0x00A92E,
|
|
0x00A92F,
|
|
0x00A953,
|
|
0x00A95F,
|
|
0x00A97C,
|
|
0x00A9CD,
|
|
0x00A9CF,
|
|
0x00A9D9,
|
|
0x00A9DF,
|
|
0x00A9FE,
|
|
0x00AA36,
|
|
0x00AA4D,
|
|
0x00AA59,
|
|
0x00AA5F,
|
|
0x00AA7F,
|
|
0x00AAC2,
|
|
0x00AADF,
|
|
0x00AAF6,
|
|
0x00AB06,
|
|
0x00AB0E,
|
|
0x00AB16,
|
|
0x00AB26,
|
|
0x00AB2E,
|
|
0x00AB5A,
|
|
0x00AB5B,
|
|
0x00AB64,
|
|
0x00AB65,
|
|
0x00AB69,
|
|
0x00AB6B,
|
|
0x00ABBF,
|
|
0x00ABED,
|
|
0x00ABF9,
|
|
0x00D7A3,
|
|
0x00D7C6,
|
|
0x00D7FB,
|
|
0x00FA6D,
|
|
0x00FAD9,
|
|
0x00FB06,
|
|
0x00FB17,
|
|
0x00FB36,
|
|
0x00FB3C,
|
|
0x00FB3E,
|
|
0x00FB41,
|
|
0x00FB44,
|
|
0x00FB4F,
|
|
0x00FD3D,
|
|
0x00FD3F,
|
|
0x00FDCF,
|
|
0x00FDFF,
|
|
0x00FE0F,
|
|
0x00FE19,
|
|
0x00FE2D,
|
|
0x00FE2F,
|
|
0x00FE52,
|
|
0x00FE66,
|
|
0x00FE6B,
|
|
0x00FE74,
|
|
0x00FEFC,
|
|
0x00FEFF,
|
|
0x00FF20,
|
|
0x00FF3A,
|
|
0x00FF40,
|
|
0x00FF5A,
|
|
0x00FF65,
|
|
0x00FF6F,
|
|
0x00FF70,
|
|
0x00FF9D,
|
|
0x00FF9F,
|
|
0x00FFBE,
|
|
0x00FFC7,
|
|
0x00FFCF,
|
|
0x00FFD7,
|
|
0x00FFDC,
|
|
0x00FFE6,
|
|
0x00FFEE,
|
|
0x00FFFD,
|
|
0x01000B,
|
|
0x010026,
|
|
0x01003A,
|
|
0x01003D,
|
|
0x01004D,
|
|
0x01005D,
|
|
0x0100FA,
|
|
0x010102,
|
|
0x010133,
|
|
0x01013F,
|
|
0x01018E,
|
|
0x01019C,
|
|
0x0101A0,
|
|
0x0101FC,
|
|
0x0101FD,
|
|
0x01029C,
|
|
0x0102D0,
|
|
0x0102E0,
|
|
0x0102FB,
|
|
0x010323,
|
|
0x01032F,
|
|
0x01034A,
|
|
0x01037A,
|
|
0x01039D,
|
|
0x01039F,
|
|
0x0103C3,
|
|
0x0103D5,
|
|
0x01044F,
|
|
0x01047F,
|
|
0x01049D,
|
|
0x0104A9,
|
|
0x0104D3,
|
|
0x0104FB,
|
|
0x010527,
|
|
0x010563,
|
|
0x01056F,
|
|
0x01057A,
|
|
0x01058A,
|
|
0x010592,
|
|
0x010595,
|
|
0x0105A1,
|
|
0x0105B1,
|
|
0x0105B9,
|
|
0x0105BC,
|
|
0x0105F3,
|
|
0x010736,
|
|
0x010755,
|
|
0x010767,
|
|
0x010785,
|
|
0x0107B0,
|
|
0x0107BA,
|
|
0x010805,
|
|
0x010808,
|
|
0x010835,
|
|
0x010838,
|
|
0x01083C,
|
|
0x01083F,
|
|
0x010855,
|
|
0x01085F,
|
|
0x01087F,
|
|
0x01089E,
|
|
0x0108AF,
|
|
0x0108F2,
|
|
0x0108F5,
|
|
0x0108FF,
|
|
0x01091B,
|
|
0x01091F,
|
|
0x010939,
|
|
0x01093F,
|
|
0x010959,
|
|
0x01099F,
|
|
0x0109B7,
|
|
0x0109CF,
|
|
0x0109FF,
|
|
0x010A03,
|
|
0x010A06,
|
|
0x010A13,
|
|
0x010A17,
|
|
0x010A35,
|
|
0x010A3A,
|
|
0x010A48,
|
|
0x010A58,
|
|
0x010A7F,
|
|
0x010A9F,
|
|
0x010AE6,
|
|
0x010AF6,
|
|
0x010B35,
|
|
0x010B3F,
|
|
0x010B55,
|
|
0x010B5F,
|
|
0x010B72,
|
|
0x010B7F,
|
|
0x010B91,
|
|
0x010B9C,
|
|
0x010BAF,
|
|
0x010C48,
|
|
0x010CB2,
|
|
0x010CF2,
|
|
0x010CFF,
|
|
0x010D27,
|
|
0x010D39,
|
|
0x010D65,
|
|
0x010D85,
|
|
0x010D8F,
|
|
0x010E7E,
|
|
0x010EA9,
|
|
0x010EAD,
|
|
0x010EB1,
|
|
0x010EC7,
|
|
0x010ED8,
|
|
0x010EFF,
|
|
0x010F27,
|
|
0x010F59,
|
|
0x010F89,
|
|
0x010FCB,
|
|
0x010FF6,
|
|
0x01104D,
|
|
0x011075,
|
|
0x01107F,
|
|
0x0110C2,
|
|
0x0110CD,
|
|
0x0110E8,
|
|
0x0110F9,
|
|
0x011134,
|
|
0x011147,
|
|
0x011176,
|
|
0x0111DF,
|
|
0x0111F4,
|
|
0x011211,
|
|
0x011241,
|
|
0x011286,
|
|
0x011288,
|
|
0x01128D,
|
|
0x01129D,
|
|
0x0112A9,
|
|
0x0112EA,
|
|
0x0112F9,
|
|
0x011303,
|
|
0x01130C,
|
|
0x011310,
|
|
0x011328,
|
|
0x011330,
|
|
0x011333,
|
|
0x011339,
|
|
0x01133B,
|
|
0x011344,
|
|
0x011348,
|
|
0x01134D,
|
|
0x011350,
|
|
0x011357,
|
|
0x011363,
|
|
0x01136C,
|
|
0x011374,
|
|
0x011389,
|
|
0x01138B,
|
|
0x01138E,
|
|
0x0113B5,
|
|
0x0113C0,
|
|
0x0113C2,
|
|
0x0113C5,
|
|
0x0113CA,
|
|
0x0113D5,
|
|
0x0113D8,
|
|
0x0113E2,
|
|
0x01145B,
|
|
0x011461,
|
|
0x0114C7,
|
|
0x0114D9,
|
|
0x0115B5,
|
|
0x0115DD,
|
|
0x011644,
|
|
0x011659,
|
|
0x01166C,
|
|
0x0116B9,
|
|
0x0116C9,
|
|
0x0116E3,
|
|
0x01171A,
|
|
0x01172B,
|
|
0x011746,
|
|
0x01183B,
|
|
0x0118F2,
|
|
0x0118FF,
|
|
0x011906,
|
|
0x011909,
|
|
0x011913,
|
|
0x011916,
|
|
0x011935,
|
|
0x011938,
|
|
0x011946,
|
|
0x011959,
|
|
0x0119A7,
|
|
0x0119D7,
|
|
0x0119E4,
|
|
0x011A47,
|
|
0x011AA2,
|
|
0x011ABF,
|
|
0x011AF8,
|
|
0x011B09,
|
|
0x011B67,
|
|
0x011BE1,
|
|
0x011BF9,
|
|
0x011C08,
|
|
0x011C36,
|
|
0x011C45,
|
|
0x011C6C,
|
|
0x011C8F,
|
|
0x011CA7,
|
|
0x011CB6,
|
|
0x011D06,
|
|
0x011D09,
|
|
0x011D36,
|
|
0x011D3A,
|
|
0x011D3D,
|
|
0x011D47,
|
|
0x011D59,
|
|
0x011D65,
|
|
0x011D68,
|
|
0x011D8E,
|
|
0x011D91,
|
|
0x011D98,
|
|
0x011DA9,
|
|
0x011DDB,
|
|
0x011DE9,
|
|
0x011EF8,
|
|
0x011F10,
|
|
0x011F3A,
|
|
0x011F5A,
|
|
0x011FB0,
|
|
0x011FF1,
|
|
0x011FFF,
|
|
0x012399,
|
|
0x01246E,
|
|
0x012474,
|
|
0x012543,
|
|
0x012FF2,
|
|
0x013455,
|
|
0x0143FA,
|
|
0x014646,
|
|
0x016139,
|
|
0x016A38,
|
|
0x016A5E,
|
|
0x016A69,
|
|
0x016A6F,
|
|
0x016ABE,
|
|
0x016AC9,
|
|
0x016AED,
|
|
0x016AF5,
|
|
0x016B45,
|
|
0x016B59,
|
|
0x016B61,
|
|
0x016B77,
|
|
0x016B8F,
|
|
0x016D79,
|
|
0x016E9A,
|
|
0x016EB8,
|
|
0x016ED3,
|
|
0x016F4A,
|
|
0x016F87,
|
|
0x016F9F,
|
|
0x016FE0,
|
|
0x016FE1,
|
|
0x016FE3,
|
|
0x016FE4,
|
|
0x016FF6,
|
|
0x018AFF,
|
|
0x018CD5,
|
|
0x018CFF,
|
|
0x018D1E,
|
|
0x018DF2,
|
|
0x01AFF3,
|
|
0x01AFFB,
|
|
0x01AFFE,
|
|
0x01B000,
|
|
0x01B11F,
|
|
0x01B122,
|
|
0x01B132,
|
|
0x01B152,
|
|
0x01B155,
|
|
0x01B167,
|
|
0x01B2FB,
|
|
0x01BC6A,
|
|
0x01BC7C,
|
|
0x01BC88,
|
|
0x01BC99,
|
|
0x01BC9F,
|
|
0x01BCA3,
|
|
0x01CCFC,
|
|
0x01CEB3,
|
|
0x01CED0,
|
|
0x01CEF0,
|
|
0x01CF2D,
|
|
0x01CF46,
|
|
0x01CFC3,
|
|
0x01D0F5,
|
|
0x01D126,
|
|
0x01D166,
|
|
0x01D169,
|
|
0x01D17A,
|
|
0x01D182,
|
|
0x01D184,
|
|
0x01D18B,
|
|
0x01D1A9,
|
|
0x01D1AD,
|
|
0x01D1EA,
|
|
0x01D245,
|
|
0x01D2D3,
|
|
0x01D2F3,
|
|
0x01D356,
|
|
0x01D378,
|
|
0x01D454,
|
|
0x01D49C,
|
|
0x01D49F,
|
|
0x01D4A2,
|
|
0x01D4A6,
|
|
0x01D4AC,
|
|
0x01D4B9,
|
|
0x01D4BB,
|
|
0x01D4C3,
|
|
0x01D505,
|
|
0x01D50A,
|
|
0x01D514,
|
|
0x01D51C,
|
|
0x01D539,
|
|
0x01D53E,
|
|
0x01D544,
|
|
0x01D546,
|
|
0x01D550,
|
|
0x01D6A5,
|
|
0x01D7CB,
|
|
0x01D7FF,
|
|
0x01DA8B,
|
|
0x01DA9F,
|
|
0x01DAAF,
|
|
0x01DF1E,
|
|
0x01DF2A,
|
|
0x01E006,
|
|
0x01E018,
|
|
0x01E021,
|
|
0x01E024,
|
|
0x01E02A,
|
|
0x01E06D,
|
|
0x01E08F,
|
|
0x01E12C,
|
|
0x01E13D,
|
|
0x01E149,
|
|
0x01E14F,
|
|
0x01E2AE,
|
|
0x01E2F9,
|
|
0x01E2FF,
|
|
0x01E4F9,
|
|
0x01E5FA,
|
|
0x01E5FF,
|
|
0x01E6DE,
|
|
0x01E6F5,
|
|
0x01E6FF,
|
|
0x01E7E6,
|
|
0x01E7EB,
|
|
0x01E7EE,
|
|
0x01E7FE,
|
|
0x01E8C4,
|
|
0x01E8D6,
|
|
0x01E94B,
|
|
0x01E959,
|
|
0x01E95F,
|
|
0x01ECB4,
|
|
0x01ED3D,
|
|
0x01EE03,
|
|
0x01EE1F,
|
|
0x01EE22,
|
|
0x01EE24,
|
|
0x01EE27,
|
|
0x01EE32,
|
|
0x01EE37,
|
|
0x01EE39,
|
|
0x01EE3B,
|
|
0x01EE42,
|
|
0x01EE47,
|
|
0x01EE49,
|
|
0x01EE4B,
|
|
0x01EE4F,
|
|
0x01EE52,
|
|
0x01EE54,
|
|
0x01EE57,
|
|
0x01EE59,
|
|
0x01EE5B,
|
|
0x01EE5D,
|
|
0x01EE5F,
|
|
0x01EE62,
|
|
0x01EE64,
|
|
0x01EE6A,
|
|
0x01EE72,
|
|
0x01EE77,
|
|
0x01EE7C,
|
|
0x01EE7E,
|
|
0x01EE89,
|
|
0x01EE9B,
|
|
0x01EEA3,
|
|
0x01EEA9,
|
|
0x01EEBB,
|
|
0x01EEF1,
|
|
0x01F02B,
|
|
0x01F093,
|
|
0x01F0AE,
|
|
0x01F0BF,
|
|
0x01F0CF,
|
|
0x01F0F5,
|
|
0x01F1AD,
|
|
0x01F1FF,
|
|
0x01F200,
|
|
0x01F202,
|
|
0x01F23B,
|
|
0x01F248,
|
|
0x01F251,
|
|
0x01F265,
|
|
0x01F6D8,
|
|
0x01F6EC,
|
|
0x01F6FC,
|
|
0x01F7D9,
|
|
0x01F7EB,
|
|
0x01F7F0,
|
|
0x01F80B,
|
|
0x01F847,
|
|
0x01F859,
|
|
0x01F887,
|
|
0x01F8AD,
|
|
0x01F8BB,
|
|
0x01F8C1,
|
|
0x01F8D8,
|
|
0x01FA57,
|
|
0x01FA6D,
|
|
0x01FA7C,
|
|
0x01FA8A,
|
|
0x01FAC6,
|
|
0x01FAC8,
|
|
0x01FADC,
|
|
0x01FAEA,
|
|
0x01FAF8,
|
|
0x01FB92,
|
|
0x01FBFA,
|
|
0x02A6DF,
|
|
0x02B81D,
|
|
0x02CEAD,
|
|
0x02EBE0,
|
|
0x02EE5D,
|
|
0x02FA1D,
|
|
0x03134A,
|
|
0x033479,
|
|
0x0E0001,
|
|
0x0E007F,
|
|
0x0E01EF,
|
|
};
|
|
const auto cpts = unicode_cpts_from_utf8(*utf8);
|
|
const size_t n_cpt = cpts.size();
|
|
|
|
std::vector<std::string> scripts;
|
|
scripts.reserve(n_cpt);
|
|
|
|
for (const auto& cpt: cpts) {
|
|
const auto it = std::lower_bound(unicode_script_lasts.begin(), unicode_script_lasts.end(), cpt);
|
|
if (it != unicode_script_lasts.end()) {
|
|
scripts.push_back(unicode_scripts[std::distance(unicode_script_lasts.begin(), it)]);
|
|
}
|
|
}
|
|
|
|
if (dst_cpts != nullptr) {
|
|
*dst_cpts = cpts;
|
|
}
|
|
if (dst_scripts != nullptr) {
|
|
*dst_scripts = scripts;
|
|
}
|
|
|
|
return n_cpt;
|
|
}
|
|
|