package com.github.curiousoddman.rgxgen.model;

/* **************************************************************************
   Copyright 2019 Vladislavs Varslavans

   Licensed under the Apache License, Version 2.0 (the "License");
   you may not use this file except in compliance with the License.
   You may obtain a copy of the License at

       http://www.apache.org/licenses/LICENSE-2.0

   Unless required by applicable law or agreed to in writing, software
   distributed under the License is distributed on an "AS IS" BASIS,
   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   See the License for the specific language governing permissions and
   limitations under the License.
/* **************************************************************************/

import com.github.curiousoddman.rgxgen.util.Util;

import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static com.github.curiousoddman.rgxgen.model.SymbolRange.range;
import static com.github.curiousoddman.rgxgen.model.UnicodeCategoryConstants.*;
import static com.github.curiousoddman.rgxgen.parsing.dflt.ConstantsProvider.ZERO_LENGTH_CHARACTER_ARRAY;
import static java.util.Arrays.asList;
import static java.util.Arrays.stream;
import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;

public enum UnicodeCategory {
    /**
     * The configuration is based on <a href="https://www.regular-expressions.info/unicode.html#category">...</a>
     */

    ANY_LETTER(keys("L", "Letter"), "Any kind of letter from any language", asList(BASIC_LATIN_UPPERCASE_LATIN_ALPHABET, BASIC_LATIN_LOWERCASE_LATIN_ALPHABET, LATIN_1_SUPPLEMENT_LETTERS, LATIN_1_SUPPLEMENT_UPPERCASE_LETTERS_TO_LOWERCASE_LETTERS, LATIN_1_SUPPLEMENT_TO_SPACING_MODIFIER_LETTERS, SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_SUBSET_1, SPACING_MODIFIER_LETTERS_ADDITIONS_BASED_ON_1989_IPA_SUBSET, GREEK_AND_COPTIC_ARCHAIC_LETTERS_TO_ARCHAIC_LETTERS, GREEK_AND_COPTIC_ARCHAIC_LETTERS_SUBSET, GREEK_AND_COPTIC_IOTA_SUBSCRIPT_TO_LOWERCASE_OF_EDITORIAL_SYMBOLS, GREEK_AND_COPTIC_LETTERS_SUBSET, GREEK_AND_COPTIC_LETTERS_SUBSET_2, GREEK_AND_COPTIC_LETTERS_TO_VARIANT_LETTERFORMS_AND_SYMBOLS, GREEK_AND_COPTIC_TO_CYRILLIC, CYRILLIC_TO_CYRILLIC_SUPPLEMENT, CYRILLIC_SUPPLEMENT_UPPERCASE_LETTERS_SUBSET, CYRILLIC_SUPPLEMENT_LOWERCASE_LETTERS, CYRILLIC_SUPPLEMENT_BASED_ON_ISO_8859_8_SUBSET, CYRILLIC_SUPPLEMENT_SIGN_TO_YIDDISH_DIGRAPHS, ARABIC_ADDITION_FOR_KASHMIRI_TO_BASED_ON_ISO_8859_6, ARABIC_ARCHAIC_LETTERS, ARABIC_EXTENDED_ARABIC_LETTERS_TO_EXTENDED_ARABIC_LETTERS, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_2, ARABIC_EXTENDED_ARABIC_LETTERS_FOR_PARKARI, ARABIC_EXTENDED_ARABIC_LETTERS_2, SYRIAC_SYRIAC_LETTERS_TO_PERSIAN_LETTERS, SYRIAC_TO_THAANA, NKO_LETTERS_TO_ARCHAIC_LETTERS, NKO_TONAL_APOSTROPHES, SAMARITAN_LETTERS, MANDAIC_LETTERS, SYRIAC_SUPPLEMENT_SYRIAC_LETTERS_SUBSET, ARABIC_EXTENDED_A_ARABIC_LETTERS_FOR_AFRICAN_LANGUAGES_TO_ARABIC_LETTERS_FOR_ARWI, ARABIC_EXTENDED_A_ARABIC_LETTERS_FOR_BRAVANESE_TO_ARABIC_LETTERS_FOR_HAUSA_WOLOF_AND_OTHER_AFRICAN_ORTHOGRAPHIES, DEVANAGARI_INDEPENDENT_VOWELS_TO_CONSONANTS, DEVANAGARI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, DEVANAGARI_ADDITIONAL_SIGNS_TO_SINDHI_IMPLOSIVES, BENGALI_INDEPENDENT_VOWELS_SUBSET_2, BENGALI_INDEPENDENT_VOWELS_SUBSET_6, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS, BENGALI_CONSONANTS_SUBSET_4, BENGALI_CONSONANTS_SUBSET_15, BENGALI_ADDITIONAL_CONSONANTS_SUBSET_1, BENGALI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, BENGALI_ADDITIONS_FOR_ASSAMESE, BENGALI_INDEPENDENT_VOWELS_SUBSET, BENGALI_INDEPENDENT_VOWELS_SUBSET_7, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_1, BENGALI_CONSONANTS_SUBSET_5, BENGALI_CONSONANTS_SUBSET_10, BENGALI_CONSONANTS_SUBSET_12, BENGALI_CONSONANTS_SUBSET_16, BENGALI_ADDITIONAL_CONSONANTS_SUBSET, BENGALI_VOWEL_BASES_TO_VOWEL_BASES, BENGALI_INDEPENDENT_VOWELS_SUBSET_4, BENGALI_INDEPENDENT_VOWELS_SUBSET_9, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_2, BENGALI_CONSONANTS_SUBSET_6, BENGALI_CONSONANTS_SUBSET_9, BENGALI_CONSONANTS_SUBSET_13, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET, BENGALI_INDEPENDENT_VOWELS_SUBSET_3, BENGALI_INDEPENDENT_VOWELS_SUBSET_8, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_3, BENGALI_CONSONANTS_SUBSET_7, BENGALI_CONSONANTS_SUBSET_11, BENGALI_CONSONANTS_SUBSET_14, BENGALI_ADDITIONAL_CONSONANTS_SUBSET_2, BENGALI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT_1, BENGALI_INDEPENDENT_VOWELS_SUBSET_1, BENGALI_INDEPENDENT_VOWELS_SUBSET_5, BENGALI_INDEPENDENT_VOWELS_TO_INDEPENDENT_VOWELS, BENGALI_CONSONANTS_SUBSET, BENGALI_CONSONANTS_SUBSET_1, BENGALI_CONSONANTS_SUBSET_2, BENGALI_CONSONANTS_SUBSET_3, BENGALI_CONSONANTS_SUBSET_8, TELUGU_INDEPENDENT_VOWELS_SUBSET, TELUGU_INDEPENDENT_VOWELS_SUBSET_1, TELUGU_INDEPENDENT_VOWELS_TO_CONSONANTS, TELUGU_CONSONANTS_SUBSET, TELUGU_HISTORIC_PHONETIC_VARIANTS_SUBSET, TELUGU_ADDITIONAL_VOWELS_FOR_SANSKRIT, KANNADA_INDEPENDENT_VOWELS_SUBSET, KANNADA_INDEPENDENT_VOWELS_SUBSET_1, KANNADA_INDEPENDENT_VOWELS_TO_CONSONANTS, KANNADA_CONSONANTS_SUBSET, KANNADA_CONSONANTS_SUBSET_1, KANNADA_ADDITIONAL_VOWELS_FOR_SANSKRIT, KANNADA_SIGNS_USED_IN_SANSKRIT_SUBSET, MALAYALAM_VARIOUS_SIGNS_TO_INDEPENDENT_VOWELS, MALAYALAM_INDEPENDENT_VOWELS_SUBSET_1, MALAYALAM_INDEPENDENT_VOWELS_TO_CONSONANTS, MALAYALAM_ADDITIONAL_HISTORIC_CHILLU_LETTERS, MALAYALAM_ADDITIONAL_HISTORIC_VOWEL_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, MALAYALAM_CHILLU_LETTERS, MALAYALAM_INDEPENDENT_VOWELS_SUBSET, MALAYALAM_CONSONANTS_SUBSET_4, MALAYALAM_CONSONANTS_SUBSET_5, MALAYALAM_CONSONANTS_SUBSET, MALAYALAM_CONSONANTS_TO_SIGN, MALAYALAM_VOWELS_SUBSET_1, MALAYALAM_VOWELS_TO_VOWEL_LENGTH_SIGN, MALAYALAM_CONSONANTS_SUBSET_1, MALAYALAM_CONSONANTS_SUBSET_2, MALAYALAM_CONSONANTS_SUBSET_3, MALAYALAM_CONSONANTS_TO_SIGN_1, MALAYALAM_VOWELS_SUBSET_2, MALAYALAM_VOWELS_SUBSET, MALAYALAM_DIGRAPHS_TO_CONSONANTS_FOR_KHMU, TIBETAN_CONSONANTS_SUBSET, TIBETAN_CONSONANTS_TO_EXTENSIONS_FOR_BALTI, TIBETAN_TRANSLITERATION_HEAD_LETTERS, MYANMAR_CONSONANTS_TO_INDEPENDENT_VOWELS, MYANMAR_PALI_AND_SANSKRIT_EXTENSIONS_SUBSET, MYANMAR_EXTENSIONS_FOR_MON_SUBSET, MYANMAR_EXTENSIONS_FOR_WESTERN_PWO_KAREN_SUBSET, MYANMAR_EXTENSIONS_FOR_EASTERN_PWO_KAREN, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET_4, GEORGIAN_CAPITAL_LETTERS_KHUTSURI_SUBSET, GEORGIAN_MKHEDRULI_TO_ADDITIONAL_LETTERS, GEORGIAN_TO_ETHIOPIC, ETHIOPIC_SYLLABLES_SUBSET_3, ETHIOPIC_SYLLABLES_SUBSET_4, ETHIOPIC_SYLLABLES_SUBSET_9, ETHIOPIC_SYLLABLES_SUBSET_10, ETHIOPIC_SYLLABLES_SUBSET_2, ETHIOPIC_SYLLABLES_SUBSET_5, ETHIOPIC_SYLLABLES_SUBSET_11, ETHIOPIC_SYLLABLES_SUBSET_12, ETHIOPIC_SYLLABLES_SUBSET, ETHIOPIC_SYLLABLES_SUBSET_1, ETHIOPIC_SYLLABLES_SUBSET_7, ETHIOPIC_SYLLABLES_SUBSET_6, ETHIOPIC_SYLLABLES_SUBSET_8, ETHIOPIC_SUPPLEMENT_SYLLABLES_FOR_SEBATBEIT, CHEROKEE_UPPERCASE_SYLLABLES_TO_UPPERCASE_SYLLABLES, CHEROKEE_LOWERCASE_SYLLABLES_TO_LOWERCASE_SYLLABLES, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_SYLLABLES_TO_SYLLABLES_FOR_CARRIER, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_SYLLABLES_1, OGHAM_TRADITIONAL_LETTERS_TO_FORFEDA_SUPPLEMENTARY_LETTERS, RUNIC_LETTERS, RUNIC_TOLKIENIAN_EXTENSIONS_TO_CRYPTOGRAMMIC_LETTERS, TAGALOG_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGALOG_CONSONANTS_SUBSET, HANUNOO_INDEPENDENT_VOWELS_TO_CONSONANTS, BUHID_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGBANWA_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGBANWA_CONSONANTS_SUBSET, KHMER_CONSONANTS_TO_INDEPENDENT_VOWELS, MONGOLIAN_BASIC_LETTERS_TO_MANCHU_LETTERS, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET_2, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_SYLLABLES_FOR_MOOSE_CREE_TO_FINALS_FOR_DENE_AND_CARRIER, LIMBU_CONSONANTS_SUBSET, TAI_LE_CONSONANTS_TO_VOWELS, TAI_LE_TONE_LETTERS_SUBSET, NEW_TAI_LUE_CONSONANTS_SUBSET, NEW_TAI_LUE_VOWEL_SIGNS_TO_TONE_MARKS, BUGINESE_CONSONANTS, TAI_THAM_CONSONANTS_TO_CONSONANTS, BALINESE_INDEPENDENT_VOWELS_TO_CONSONANTS, BALINESE_ADDITIONAL_CONSONANTS_SUBSET, SUNDANESE_VOWELS_TO_CONSONANTS, SUNDANESE_ADDITIONAL_CONSONANTS, SUNDANESE_TO_BATAK, LEPCHA_CONSONANTS, LEPCHA_ADDITIONAL_LETTERS, OL_CHIKI_LETTERS_TO_MODIFIER_LETTERS, CYRILLIC_EXTENDED_C_HISTORIC_LETTER_VARIANTS_SUBSET, GEORGIAN_EXTENDED_CAPITAL_LETTERS_MTAVRULI_TO_ADDITIONAL_LETTERS, GEORGIAN_EXTENDED_ADDITIONAL_LETTERS_FOR_OSSETIAN_AND_ABKHAZ, VEDIC_EXTENSIONS_NASALIZATION_SIGNS_SUBSET, VEDIC_EXTENSIONS_NASALIZATION_SIGNS_TO_ARDHAVISARGA, VEDIC_EXTENSIONS_SIGNS_SUBSET, PHONETIC_EXTENSIONS_TO_PHONETIC_EXTENSIONS_SUPPLEMENT, LATIN_EXTENDED_ADDITIONAL_TO_GREEK_EXTENDED, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_19, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_27, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_9, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_12, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_21, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_2, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_39, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_3, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_5, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_11, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_16, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_26, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_36, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_40, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_TO_SUBSCRIPTS_FOR_UPA, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_3, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_9, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_11, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_TO_HEBREW_LETTERLIKE_MATH_SYMBOLS, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET_3, LETTERLIKE_SYMBOLS_DOUBLE_STRUCK_ITALIC_MATH_SYMBOLS, NUMBER_FORMS_ARCHAIC_ROMAN_NUMERALS_TO_ARCHAIC_ROMAN_NUMERALS, GLAGOLITIC_CAPITAL_LETTERS_SUBSET, GLAGOLITIC_SMALL_LETTERS_SUBSET, LATIN_EXTENDED_C_TO_COPTIC, COPTIC_CRYPTOGRAMMIC_LETTERS, COPTIC_BOHAIRIC_COPTIC_LETTERS_SUBSET, GEORGIAN_SUPPLEMENT_SMALL_LETTERS_KHUTSURI_SUBSET, TIFINAGH_LETTERS_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_MEEN_TO_SYLLABLES_FOR_BLIN, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_1, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_2, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_3, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_1, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_2, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_3, CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS_AND_PUNCTUATION_SUBSET_1, CJK_SYMBOLS_AND_PUNCTUATION_KANA_REPEAT_MARKS, CJK_SYMBOLS_AND_PUNCTUATION_OTHER_CJK_PUNCTUATION_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_HIRAGANA_LETTERS_TO_SMALL_LETTERS, CJK_SYMBOLS_AND_PUNCTUATION_ITERATION_MARKS_TO_ITERATION_MARKS, KATAKANA_KATAKANA_LETTERS, KATAKANA_CONJUNCTION_AND_LENGTH_MARKS_TO_ITERATION_MARKS_1, KATAKANA_BASED_ON_GB_2312_TO_MISCELLANEOUS_ADDITIONS, KATAKANA_CONSONANT_LETTERS_TO_OLD_VOWEL_LETTERS, BOPOMOFO_EXTENDED_EXTENDED_BOPOMOFO_FOR_MINNAN_AND_HAKKA_TO_EXTENDED_BOPOMOFO_FOR_CANTONESE, KATAKANA_PHONETIC_EXTENSIONS_PHONETIC_EXTENSIONS_FOR_AINU, CJK_COMPATIBILITY_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, YIJING_HEXAGRAM_SYMBOLS_CJK_UNIFIED_IDEOGRAPHS_SUBSET, YI_SYLLABLES_SYLLABLES_TO_SYLLABLES, LISU_CONSONANTS_TO_TONES, VAI_SYLLABLES_IN__EE_TO_SYLLABLE_FINALS, VAI_HISTORIC_SYLLABLES_TO_LOGOGRAMS, VAI_HISTORIC_SYLLABLES_SUBSET, CYRILLIC_EXTENDED_B_LETTERS_FOR_OLD_CYRILLIC, CYRILLIC_EXTENDED_B_MODIFIER_LETTER_TO_INTONATION_MARKS_FOR_LITHUANIAN_DIALECTOLOGY, BAMUM_SYLLABLES_SUBSET, MODIFIER_TONE_LETTERS_CHINANTEC_TONE_MARKS_TO_AFRICANIST_TONE_LETTERS, LATIN_EXTENDED_D_EGYPTOLOGICAL_ADDITIONS_TO_INSULAR_AND_CELTICIST_LETTERS, LATIN_EXTENDED_D_ORTHOGRAPHIC_LETTERS_FOR_GLOTTALS_TO_LETTERS_FOR_UGARITIC_AND_EGYPTOLOGICAL_TRANSLITERATION, LATIN_EXTENDED_D_ADDITIONAL_MEDIEVAL_LETTERS_TO_ADDITIONAL_LETTERS_FOR_GAULISH, LATIN_EXTENDED_D_TO_SYLOTI_NAGRI, SYLOTI_NAGRI_INDEPENDENT_VOWELS_AND_DVISVARA_SUBSET, SYLOTI_NAGRI_CONSONANTS_AND_CONSONANT_SIGNS_SUBSET, SYLOTI_NAGRI_CONSONANTS_AND_CONSONANT_SIGNS_SUBSET_1, PHAGS_PA_CONSONANTS_TO_CONSONANT_ADDITION_FOR_TIBETAN, SAURASHTRA_INDEPENDENT_VOWELS_TO_CONSONANTS, DEVANAGARI_EXTENDED_MARKS_OF_NASALIZATION, DEVANAGARI_EXTENDED_SIGNS_TO_SIGNS, KAYAH_LI_CONSONANTS_TO_VOWELS, REJANG_CONSONANTS, HANGUL_JAMO_EXTENDED_A_OLD_INITIAL_CONSONANTS_SUBSET, JAVANESE_LETTERS, MYANMAR_EXTENDED_B_ADDITIONS_FOR_SHAN_PALI_SUBSET, MYANMAR_EXTENDED_B_REDUPLICATION_MARK_TO_TAI_LAING_CONSONANTS, MYANMAR_EXTENDED_B_TAI_LAING_CONSONANTS_SUBSET, CHAM_INDEPENDENT_VOWELS_TO_CONSONANTS, CHAM_FINAL_CONSONANTS_SUBSET, CHAM_FINAL_CONSONANTS_SUBSET_1, MYANMAR_EXTENDED_A_KHAMTI_SHAN_CONSONANTS_TO_KHAMTI_SHAN_LOGOGRAMS, MYANMAR_EXTENDED_A_TO_TAI_VIET, TAI_VIET_VOWELS_AND_FINALS_SUBSET_1, TAI_VIET_VOWELS_AND_FINALS_SUBSET_3, TAI_VIET_WORD_LIGATURE_SYMBOLS_TO_WORD_LIGATURE_SYMBOLS, MEETEI_MAYEK_EXTENSIONS_INDEPENDENT_VOWEL_SIGNS_TO_CONSONANTS, MEETEI_MAYEK_EXTENSIONS_SIGN_TO_REPETITION_MARKS, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_AND_BASKETO_SUBSET, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_AND_BASKETO_SUBSET_1, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_SUBSET, MEETEI_MAYEK_EXTENSIONS_GUMUZ_SUBSET, MEETEI_MAYEK_EXTENSIONS_GUMUZ_SUBSET_1, LATIN_EXTENDED_E_LETTERS_FOR_GERMAN_DIALECTOLOGY, LATIN_EXTENDED_E_MODIFIER_LETTERS_FOR_GERMAN_DIALECTOLOGY_TO_LETTERS_FOR_SCOTS_DIALECTOLOGY, CHEROKEE_SUPPLEMENT_TO_MEETEI_MAYEK, MEETEI_MAYEK_HANGUL_SYLLABLES_SUBSET, HANGUL_JAMO_EXTENDED_B_OLD_MEDIAL_VOWELS_SUBSET, HANGUL_JAMO_EXTENDED_B_OLD_FINAL_CONSONANTS_SUBSET), 'ª', 'µ', 'º', 'ˬ', 'ˮ', 'Ϳ', 'Ά', 'Ό', 'ՙ', 'ە', 'ۿ', 'ܐ', 'ޱ', 'ߺ', 'ࠚ', 'ࠤ', 'ࠨ', 'ऽ', 'ॐ', 'ল', 'ঽ', 'ৎ', 'ৼ', 'ਫ਼', 'ઽ', 'ૐ', 'ૹ', 'ଽ', 'ୱ', 'ஃ', 'ஜ', 'ௐ', 'ఽ', 'ಀ', 'ಽ', 'ೞ', 'ഽ', 'ൎ', 'ල', 'ຄ', 'ລ', 'ຽ', 'ໆ', 'ༀ', 'ဿ', 'ၡ', 'ႎ', 'Ⴧ', 'Ⴭ', 'ቘ', 'ዀ', 'ៗ', 'ៜ', 'ᢪ', 'ᪧ', 'ᳺ', 'Ὑ', 'Ὓ', 'Ὕ', 'ι', 'ⁱ', 'ⁿ', 'ℂ', 'ℇ', 'ℕ', 'ℤ', 'Ω', 'ℨ', 'ⅎ', 'ⴧ', 'ⴭ', 'ⵯ', 'ⸯ', 'ꣻ', 'ꧏ', 'ꩺ', 'ꪱ', 'ꫀ', 'ꫂ'),
    LOWERCASE_LETTER(keys("Ll", "Lowercase_Letter"), "a lowercase letter that has an uppercase variant.", asList(BASIC_LATIN_LOWERCASE_LATIN_ALPHABET, LATIN_1_SUPPLEMENT_LOWERCASE_LETTERS, LATIN_1_SUPPLEMENT_LETTERS_1, LATIN_EXTENDED_A_EUROPEAN_LATIN_SUBSET, LATIN_EXTENDED_A_EUROPEAN_LATIN_TO_EUROPEAN_LATIN, LATIN_EXTENDED_A_EUROPEAN_LATIN_TO_EUROPEAN_LATIN_1, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_3, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_7, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_11, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_15, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_16, LATIN_EXTENDED_B_PINYIN_DIACRITIC_VOWEL_COMBINATIONS_TO_PINYIN_DIACRITIC_VOWEL_COMBINATIONS, LATIN_EXTENDED_B_PHONETIC_AND_HISTORIC_LETTERS_SUBSET, LATIN_EXTENDED_B_ADDITIONS_FOR_LIVONIAN_TO_ADDITIONS_FOR_AFRICANIST_LINGUISTICS, LATIN_EXTENDED_B_ADDITIONS_FOR_AFRICANIST_LINGUISTICS_1, LATIN_EXTENDED_B_TO_IPA_EXTENSIONS, IPA_EXTENSIONS_IPA_EXTENSIONS_TO_ADDITIONS_FOR_SINOLOGY_1, GREEK_AND_COPTIC_LOWERCASE_OF_EDITORIAL_SYMBOLS, GREEK_AND_COPTIC_LETTERS_SUBSET_5, GREEK_AND_COPTIC_VARIANT_LETTERFORMS_SUBSET, GREEK_AND_COPTIC_VARIANT_LETTERFORMS_SUBSET_2, GREEK_AND_COPTIC_COPTIC_LETTERS_DERIVED_FROM_DEMOTIC_TO_VARIANT_LETTERFORMS, GREEK_AND_COPTIC_ARCHAIC_LETTERS_TO_ARCHAIC_LETTERS_1, CYRILLIC_BASIC_RUSSIAN_ALPHABET_TO_CYRILLIC_EXTENSIONS, CYRILLIC_EXTENDED_CYRILLIC_SUBSET_1, CYRILLIC_SUPPLEMENT_LOWERCASE_LETTERS, GEORGIAN_MKHEDRULI_TO_ADDITIONAL_LETTERS, GEORGIAN_ADDITIONAL_LETTERS_FOR_OSSETIAN_AND_ABKHAZ, CHEROKEE_LOWERCASE_SYLLABLES_TO_LOWERCASE_SYLLABLES, CYRILLIC_EXTENDED_C_HISTORIC_LETTER_VARIANTS_SUBSET, PHONETIC_EXTENSIONS_LATIN_LETTERS_TO_GREEK_LETTERS, PHONETIC_EXTENSIONS_LATIN_LETTER_TO_LATIN_LETTERS_WITH_MIDDLE_TILDE, PHONETIC_EXTENSIONS_TO_PHONETIC_EXTENSIONS_SUPPLEMENT_1, LATIN_EXTENDED_ADDITIONAL_LATIN_GENERAL_USE_EXTENSIONS_TO_MEDIEVALIST_ADDITIONS_1, LATIN_EXTENDED_ADDITIONAL_TO_GREEK_EXTENDED_1, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_14, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_22, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_33, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_1, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_12, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_23, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_35, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_13, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_24, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_34, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_37, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_3, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_4, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_11, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_15, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_25, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_36, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_38, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_5, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET_2, LETTERLIKE_SYMBOLS_DOUBLE_STRUCK_ITALIC_MATH_SYMBOLS_SUBSET, GLAGOLITIC_SMALL_LETTERS_SUBSET, LATIN_EXTENDED_C_ORTHOGRAPHIC_LATIN_ADDITIONS_SUBSET_1, LATIN_EXTENDED_C_MISCELLANEOUS_ADDITIONS_SUBSET_1, LATIN_EXTENDED_C_CLAUDIAN_LETTERS_TO_ADDITIONS_FOR_UPA, COPTIC_OLD_NUBIAN_LETTERS_TO_OLD_NUBIAN_LETTERS, GEORGIAN_SUPPLEMENT_SMALL_LETTERS_KHUTSURI_SUBSET, LATIN_EXTENDED_D_MAYANIST_ADDITIONS_TO_MEDIEVALIST_ADDITIONS, LATIN_EXTENDED_D_MEDIEVALIST_ADDITIONS_SUBSET, LATIN_EXTENDED_D_ADDITIONAL_LETTERS_TO_ADDITIONS_FOR_LITHUANIAN_DIALECTOLOGY, LATIN_EXTENDED_E_LETTERS_FOR_GERMAN_DIALECTOLOGY, LATIN_EXTENDED_E_HISTORIC_LETTERS_FOR_SAKHA_YAKUT_TO_LETTERS_FOR_SINOLOGICAL_TRANSCRIPTION, CHEROKEE_SUPPLEMENT_LOWERCASE_SYLLABLES), 'µ', 'ā', 'ă', 'ą', 'ć', 'ĉ', 'ċ', 'č', 'ď', 'đ', 'ē', 'ĕ', 'ė', 'ę', 'ě', 'ĝ', 'ğ', 'ġ', 'ģ', 'ĥ', 'ħ', 'ĩ', 'ī', 'ĭ', 'į', 'ı', 'ĳ', 'ĵ', 'ĺ', 'ļ', 'ľ', 'ŀ', 'ł', 'ń', 'ņ', 'ŋ', 'ō', 'ŏ', 'ő', 'œ', 'ŕ', 'ŗ', 'ř', 'ś', 'ŝ', 'ş', 'š', 'ţ', 'ť', 'ŧ', 'ũ', 'ū', 'ŭ', 'ů', 'ű', 'ų', 'ŵ', 'ŷ', 'ź', 'ż', 'ƃ', 'ƅ', 'ƈ', 'ƒ', 'ƕ', 'ƞ', 'ơ', 'ƣ', 'ƥ', 'ƨ', 'ƭ', 'ư', 'ƴ', 'ƶ', 'ǆ', 'ǉ', 'ǌ', 'ǎ', 'ǐ', 'ǒ', 'ǔ', 'ǖ', 'ǘ', 'ǚ', 'ǟ', 'ǡ', 'ǣ', 'ǥ', 'ǧ', 'ǩ', 'ǫ', 'ǭ', 'ǳ', 'ǵ', 'ǹ', 'ǻ', 'ǽ', 'ǿ', 'ȁ', 'ȃ', 'ȅ', 'ȇ', 'ȉ', 'ȋ', 'ȍ', 'ȏ', 'ȑ', 'ȓ', 'ȕ', 'ȗ', 'ș', 'ț', 'ȝ', 'ȟ', 'ȡ', 'ȣ', 'ȥ', 'ȧ', 'ȩ', 'ȫ', 'ȭ', 'ȯ', 'ȱ', 'ȼ', 'ɂ', 'ɇ', 'ɉ', 'ɋ', 'ɍ', 'ͱ', 'ͳ', 'ͷ', 'ΐ', 'ϙ', 'ϛ', 'ϝ', 'ϟ', 'ϡ', 'ϣ', 'ϥ', 'ϧ', 'ϩ', 'ϫ', 'ϭ', 'ϵ', 'ϸ', 'ѡ', 'ѣ', 'ѥ', 'ѧ', 'ѩ', 'ѫ', 'ѭ', 'ѯ', 'ѱ', 'ѳ', 'ѵ', 'ѷ', 'ѹ', 'ѻ', 'ѽ', 'ѿ', 'ҁ', 'ҋ', 'ҍ', 'ҏ', 'ґ', 'ғ', 'ҕ', 'җ', 'ҙ', 'қ', 'ҝ', 'ҟ', 'ҡ', 'ң', 'ҥ', 'ҧ', 'ҩ', 'ҫ', 'ҭ', 'ү', 'ұ', 'ҳ', 'ҵ', 'ҷ', 'ҹ', 'һ', 'ҽ', 'ҿ', 'ӂ', 'ӄ', 'ӆ', 'ӈ', 'ӊ', 'ӌ', 'ӑ', 'ӓ', 'ӕ', 'ӗ', 'ә', 'ӛ', 'ӝ', 'ӟ', 'ӡ', 'ӣ', 'ӥ', 'ӧ', 'ө', 'ӫ', 'ӭ', 'ӯ', 'ӱ', 'ӳ', 'ӵ', 'ӷ', 'ӹ', 'ӻ', 'ӽ', 'ӿ', 'ԁ', 'ԃ', 'ԅ', 'ԇ', 'ԉ', 'ԋ', 'ԍ', 'ԏ', 'ԑ', 'ԓ', 'ԕ', 'ԗ', 'ԙ', 'ԛ', 'ԝ', 'ԟ', 'ԡ', 'ԣ', 'ԥ', 'ԧ', 'ԩ', 'ԫ', 'ԭ', 'ԯ', 'ḁ', 'ḃ', 'ḅ', 'ḇ', 'ḉ', 'ḋ', 'ḍ', 'ḏ', 'ḑ', 'ḓ', 'ḕ', 'ḗ', 'ḙ', 'ḛ', 'ḝ', 'ḟ', 'ḡ', 'ḣ', 'ḥ', 'ḧ', 'ḩ', 'ḫ', 'ḭ', 'ḯ', 'ḱ', 'ḳ', 'ḵ', 'ḷ', 'ḹ', 'ḻ', 'ḽ', 'ḿ', 'ṁ', 'ṃ', 'ṅ', 'ṇ', 'ṉ', 'ṋ', 'ṍ', 'ṏ', 'ṑ', 'ṓ', 'ṕ', 'ṗ', 'ṙ', 'ṛ', 'ṝ', 'ṟ', 'ṡ', 'ṣ', 'ṥ', 'ṧ', 'ṩ', 'ṫ', 'ṭ', 'ṯ', 'ṱ', 'ṳ', 'ṵ', 'ṷ', 'ṹ', 'ṻ', 'ṽ', 'ṿ', 'ẁ', 'ẃ', 'ẅ', 'ẇ', 'ẉ', 'ẋ', 'ẍ', 'ẏ', 'ẑ', 'ẓ', 'ẟ', 'ạ', 'ả', 'ấ', 'ầ', 'ẩ', 'ẫ', 'ậ', 'ắ', 'ằ', 'ẳ', 'ẵ', 'ặ', 'ẹ', 'ẻ', 'ẽ', 'ế', 'ề', 'ể', 'ễ', 'ệ', 'ỉ', 'ị', 'ọ', 'ỏ', 'ố', 'ồ', 'ổ', 'ỗ', 'ộ', 'ớ', 'ờ', 'ở', 'ỡ', 'ợ', 'ụ', 'ủ', 'ứ', 'ừ', 'ử', 'ữ', 'ự', 'ỳ', 'ỵ', 'ỷ', 'ỹ', 'ỻ', 'ỽ', 'ι', 'ℊ', 'ℓ', 'ℯ', 'ℴ', 'ℹ', 'ⅎ', 'ↄ', 'ⱡ', 'ⱨ', 'ⱪ', 'ⱬ', 'ⱱ', 'ⲁ', 'ⲃ', 'ⲅ', 'ⲇ', 'ⲉ', 'ⲋ', 'ⲍ', 'ⲏ', 'ⲑ', 'ⲓ', 'ⲕ', 'ⲗ', 'ⲙ', 'ⲛ', 'ⲝ', 'ⲟ', 'ⲡ', 'ⲣ', 'ⲥ', 'ⲧ', 'ⲩ', 'ⲫ', 'ⲭ', 'ⲯ', 'ⲱ', 'ⲳ', 'ⲵ', 'ⲷ', 'ⲹ', 'ⲻ', 'ⲽ', 'ⲿ', 'ⳁ', 'ⳃ', 'ⳅ', 'ⳇ', 'ⳉ', 'ⳋ', 'ⳍ', 'ⳏ', 'ⳑ', 'ⳓ', 'ⳕ', 'ⳗ', 'ⳙ', 'ⳛ', 'ⳝ', 'ⳟ', 'ⳡ', 'ⳬ', 'ⳮ', 'ⳳ', 'ⴧ', 'ⴭ', 'ꙁ', 'ꙃ', 'ꙅ', 'ꙇ', 'ꙉ', 'ꙋ', 'ꙍ', 'ꙏ', 'ꙑ', 'ꙓ', 'ꙕ', 'ꙗ', 'ꙙ', 'ꙛ', 'ꙝ', 'ꙟ', 'ꙡ', 'ꙣ', 'ꙥ', 'ꙧ', 'ꙩ', 'ꙫ', 'ꙭ', 'ꚁ', 'ꚃ', 'ꚅ', 'ꚇ', 'ꚉ', 'ꚋ', 'ꚍ', 'ꚏ', 'ꚑ', 'ꚓ', 'ꚕ', 'ꚗ', 'ꚙ', 'ꚛ', 'ꜣ', 'ꜥ', 'ꜧ', 'ꜩ', 'ꜫ', 'ꜭ', 'ꜳ', 'ꜵ', 'ꜷ', 'ꜹ', 'ꜻ', 'ꜽ', 'ꜿ', 'ꝁ', 'ꝃ', 'ꝅ', 'ꝇ', 'ꝉ', 'ꝋ', 'ꝍ', 'ꝏ', 'ꝑ', 'ꝓ', 'ꝕ', 'ꝗ', 'ꝙ', 'ꝛ', 'ꝝ', 'ꝟ', 'ꝡ', 'ꝣ', 'ꝥ', 'ꝧ', 'ꝩ', 'ꝫ', 'ꝭ', 'ꝯ', 'ꝺ', 'ꝼ', 'ꝿ', 'ꞁ', 'ꞃ', 'ꞅ', 'ꞇ', 'ꞌ', 'ꞎ', 'ꞑ', 'ꞗ', 'ꞙ', 'ꞛ', 'ꞝ', 'ꞟ', 'ꞡ', 'ꞣ', 'ꞥ', 'ꞧ', 'ꞩ', 'ꞯ', 'ꞵ', 'ꞷ', 'ꞹ', 'ꞻ', 'ꞽ', 'ꞿ', 'ꟃ', 'ꟈ', 'ꟊ', 'ꟶ', 'ꟺ'),
    UPPERCASE_LETTER(keys("Lu", "Uppercase_Letter"), "an uppercase letter that has a lowercase variant.", asList(BASIC_LATIN_UPPERCASE_LATIN_ALPHABET, LATIN_1_SUPPLEMENT_LETTERS, LATIN_1_SUPPLEMENT_UPPERCASE_LETTERS, LATIN_EXTENDED_A_EUROPEAN_LATIN_SUBSET_1, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_1, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_2, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_4, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_5, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_6, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_8, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_9, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_10, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_12, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_13, LATIN_EXTENDED_B_NON_EUROPEAN_AND_HISTORIC_LATIN_SUBSET_14, LATIN_EXTENDED_B_PHONETIC_AND_HISTORIC_LETTERS_SUBSET_1, LATIN_EXTENDED_B_ADDITIONS_FOR_SENCOTEN_SUBSET, LATIN_EXTENDED_B_ADDITIONS_FOR_SENCOTEN_SUBSET_1, LATIN_EXTENDED_B_MISCELLANEOUS_ADDITIONS_SUBSET, GREEK_AND_COPTIC_LETTERS_SUBSET, GREEK_AND_COPTIC_LETTERS_SUBSET_1, GREEK_AND_COPTIC_LETTERS_SUBSET_3, GREEK_AND_COPTIC_LETTERS_SUBSET_4, GREEK_AND_COPTIC_VARIANT_LETTERFORMS_SUBSET_1, GREEK_AND_COPTIC_VARIANT_LETTERFORM_TO_VARIANT_LETTERFORM, GREEK_AND_COPTIC_TO_CYRILLIC_1, CYRILLIC_EXTENDED_CYRILLIC_SUBSET, CYRILLIC_SUPPLEMENT_UPPERCASE_LETTERS_SUBSET, GEORGIAN_CAPITAL_LETTERS_KHUTSURI_SUBSET, CHEROKEE_UPPERCASE_SYLLABLES_TO_UPPERCASE_SYLLABLES, GEORGIAN_EXTENDED_CAPITAL_LETTERS_MTAVRULI_TO_ADDITIONAL_LETTERS, GEORGIAN_EXTENDED_ADDITIONAL_LETTERS_FOR_OSSETIAN_AND_ABKHAZ, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_7, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_19, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_28, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_43, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_9, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_29, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_41, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_6, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_17, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_31, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_42, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_4, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_6, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_9, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_11, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_12, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET_4, GLAGOLITIC_CAPITAL_LETTERS_SUBSET, LATIN_EXTENDED_C_ORTHOGRAPHIC_LATIN_ADDITIONS_SUBSET, LATIN_EXTENDED_C_MISCELLANEOUS_ADDITIONS_SUBSET, LATIN_EXTENDED_C_ADDITIONS_FOR_SHONA_TO_ADDITIONS_FOR_SHONA, LATIN_EXTENDED_D_INSULAR_AND_CELTICIST_LETTERS_SUBSET, LATIN_EXTENDED_D_ADDITIONAL_LETTERS_TO_ADDITIONAL_LETTERS, LATIN_EXTENDED_D_LETTERS_FOR_AMERICANIST_ORTHOGRAPHIES_TO_LETTER_FOR_GERMAN_DIALECTOLOGY, LATIN_EXTENDED_D_LETTERS_USED_IN_EARLY_PINYIN_ROMANIZATION_TO_LETTERS_USED_IN_EARLY_PINYIN_ROMANIZATION), 'Ā', 'Ă', 'Ą', 'Ć', 'Ĉ', 'Ċ', 'Č', 'Ď', 'Đ', 'Ē', 'Ĕ', 'Ė', 'Ę', 'Ě', 'Ĝ', 'Ğ', 'Ġ', 'Ģ', 'Ĥ', 'Ħ', 'Ĩ', 'Ī', 'Ĭ', 'Į', 'İ', 'Ĳ', 'Ĵ', 'Ķ', 'Ĺ', 'Ļ', 'Ľ', 'Ŀ', 'Ł', 'Ń', 'Ņ', 'Ň', 'Ŋ', 'Ō', 'Ŏ', 'Ő', 'Œ', 'Ŕ', 'Ŗ', 'Ř', 'Ś', 'Ŝ', 'Ş', 'Š', 'Ţ', 'Ť', 'Ŧ', 'Ũ', 'Ū', 'Ŭ', 'Ů', 'Ű', 'Ų', 'Ŵ', 'Ŷ', 'Ż', 'Ž', 'Ƅ', 'Ƣ', 'Ƥ', 'Ʃ', 'Ƭ', 'Ƶ', 'Ƽ', 'Ǆ', 'Ǉ', 'Ǌ', 'Ǎ', 'Ǐ', 'Ǒ', 'Ǔ', 'Ǖ', 'Ǘ', 'Ǚ', 'Ǜ', 'Ǟ', 'Ǡ', 'Ǣ', 'Ǥ', 'Ǧ', 'Ǩ', 'Ǫ', 'Ǭ', 'Ǯ', 'Ǳ', 'Ǵ', 'Ǻ', 'Ǽ', 'Ǿ', 'Ȁ', 'Ȃ', 'Ȅ', 'Ȇ', 'Ȉ', 'Ȋ', 'Ȍ', 'Ȏ', 'Ȑ', 'Ȓ', 'Ȕ', 'Ȗ', 'Ș', 'Ț', 'Ȝ', 'Ȟ', 'Ƞ', 'Ȣ', 'Ȥ', 'Ȧ', 'Ȩ', 'Ȫ', 'Ȭ', 'Ȯ', 'Ȱ', 'Ȳ', 'Ɂ', 'Ɉ', 'Ɋ', 'Ɍ', 'Ɏ', 'Ͱ', 'Ͳ', 'Ͷ', 'Ϳ', 'Ά', 'Ό', 'Ϗ', 'Ϙ', 'Ϛ', 'Ϝ', 'Ϟ', 'Ϡ', 'Ϣ', 'Ϥ', 'Ϧ', 'Ϩ', 'Ϫ', 'Ϭ', 'Ϯ', 'ϴ', 'Ϸ', 'Ѡ', 'Ѣ', 'Ѥ', 'Ѧ', 'Ѩ', 'Ѫ', 'Ѭ', 'Ѯ', 'Ѱ', 'Ѳ', 'Ѵ', 'Ѷ', 'Ѹ', 'Ѻ', 'Ѽ', 'Ѿ', 'Ҁ', 'Ҋ', 'Ҍ', 'Ҏ', 'Ґ', 'Ғ', 'Ҕ', 'Җ', 'Ҙ', 'Қ', 'Ҝ', 'Ҟ', 'Ҡ', 'Ң', 'Ҥ', 'Ҧ', 'Ҩ', 'Ҫ', 'Ҭ', 'Ү', 'Ұ', 'Ҳ', 'Ҵ', 'Ҷ', 'Ҹ', 'Һ', 'Ҽ', 'Ҿ', 'Ӄ', 'Ӆ', 'Ӈ', 'Ӊ', 'Ӌ', 'Ӎ', 'Ӑ', 'Ӓ', 'Ӕ', 'Ӗ', 'Ә', 'Ӛ', 'Ӝ', 'Ӟ', 'Ӡ', 'Ӣ', 'Ӥ', 'Ӧ', 'Ө', 'Ӫ', 'Ӭ', 'Ӯ', 'Ӱ', 'Ӳ', 'Ӵ', 'Ӷ', 'Ӹ', 'Ӻ', 'Ӽ', 'Ӿ', 'Ԁ', 'Ԃ', 'Ԅ', 'Ԇ', 'Ԉ', 'Ԋ', 'Ԍ', 'Ԏ', 'Ԑ', 'Ԓ', 'Ԕ', 'Ԗ', 'Ԙ', 'Ԛ', 'Ԝ', 'Ԟ', 'Ԡ', 'Ԣ', 'Ԥ', 'Ԧ', 'Ԩ', 'Ԫ', 'Ԭ', 'Ԯ', 'Ⴧ', 'Ⴭ', 'Ḁ', 'Ḃ', 'Ḅ', 'Ḇ', 'Ḉ', 'Ḋ', 'Ḍ', 'Ḏ', 'Ḑ', 'Ḓ', 'Ḕ', 'Ḗ', 'Ḙ', 'Ḛ', 'Ḝ', 'Ḟ', 'Ḡ', 'Ḣ', 'Ḥ', 'Ḧ', 'Ḩ', 'Ḫ', 'Ḭ', 'Ḯ', 'Ḱ', 'Ḳ', 'Ḵ', 'Ḷ', 'Ḹ', 'Ḻ', 'Ḽ', 'Ḿ', 'Ṁ', 'Ṃ', 'Ṅ', 'Ṇ', 'Ṉ', 'Ṋ', 'Ṍ', 'Ṏ', 'Ṑ', 'Ṓ', 'Ṕ', 'Ṗ', 'Ṙ', 'Ṛ', 'Ṝ', 'Ṟ', 'Ṡ', 'Ṣ', 'Ṥ', 'Ṧ', 'Ṩ', 'Ṫ', 'Ṭ', 'Ṯ', 'Ṱ', 'Ṳ', 'Ṵ', 'Ṷ', 'Ṹ', 'Ṻ', 'Ṽ', 'Ṿ', 'Ẁ', 'Ẃ', 'Ẅ', 'Ẇ', 'Ẉ', 'Ẋ', 'Ẍ', 'Ẏ', 'Ẑ', 'Ẓ', 'Ẕ', 'ẞ', 'Ạ', 'Ả', 'Ấ', 'Ầ', 'Ẩ', 'Ẫ', 'Ậ', 'Ắ', 'Ằ', 'Ẳ', 'Ẵ', 'Ặ', 'Ẹ', 'Ẻ', 'Ẽ', 'Ế', 'Ề', 'Ể', 'Ễ', 'Ệ', 'Ỉ', 'Ị', 'Ọ', 'Ỏ', 'Ố', 'Ồ', 'Ổ', 'Ỗ', 'Ộ', 'Ớ', 'Ờ', 'Ở', 'Ỡ', 'Ợ', 'Ụ', 'Ủ', 'Ứ', 'Ừ', 'Ử', 'Ữ', 'Ự', 'Ỳ', 'Ỵ', 'Ỷ', 'Ỹ', 'Ỻ', 'Ỽ', 'Ỿ', 'Ὑ', 'Ὓ', 'Ὕ', 'Ὗ', 'ℂ', 'ℇ', 'ℕ', 'ℤ', 'Ω', 'ℨ', 'ⅅ', 'Ↄ', 'Ⱡ', 'Ⱨ', 'Ⱪ', 'Ⱬ', 'Ⱳ', 'Ⱶ', 'Ⲃ', 'Ⲅ', 'Ⲇ', 'Ⲉ', 'Ⲋ', 'Ⲍ', 'Ⲏ', 'Ⲑ', 'Ⲓ', 'Ⲕ', 'Ⲗ', 'Ⲙ', 'Ⲛ', 'Ⲝ', 'Ⲟ', 'Ⲡ', 'Ⲣ', 'Ⲥ', 'Ⲧ', 'Ⲩ', 'Ⲫ', 'Ⲭ', 'Ⲯ', 'Ⲱ', 'Ⲳ', 'Ⲵ', 'Ⲷ', 'Ⲹ', 'Ⲻ', 'Ⲽ', 'Ⲿ', 'Ⳁ', 'Ⳃ', 'Ⳅ', 'Ⳇ', 'Ⳉ', 'Ⳋ', 'Ⳍ', 'Ⳏ', 'Ⳑ', 'Ⳓ', 'Ⳕ', 'Ⳗ', 'Ⳙ', 'Ⳛ', 'Ⳝ', 'Ⳟ', 'Ⳡ', 'Ⳣ', 'Ⳬ', 'Ⳮ', 'Ⳳ', 'Ꙁ', 'Ꙃ', 'Ꙅ', 'Ꙇ', 'Ꙉ', 'Ꙋ', 'Ꙍ', 'Ꙏ', 'Ꙑ', 'Ꙓ', 'Ꙕ', 'Ꙗ', 'Ꙙ', 'Ꙛ', 'Ꙝ', 'Ꙟ', 'Ꙡ', 'Ꙣ', 'Ꙥ', 'Ꙧ', 'Ꙩ', 'Ꙫ', 'Ꙭ', 'Ꚁ', 'Ꚃ', 'Ꚅ', 'Ꚇ', 'Ꚉ', 'Ꚋ', 'Ꚍ', 'Ꚏ', 'Ꚑ', 'Ꚓ', 'Ꚕ', 'Ꚗ', 'Ꚙ', 'Ꚛ', 'Ꜣ', 'Ꜥ', 'Ꜧ', 'Ꜩ', 'Ꜫ', 'Ꜭ', 'Ꜯ', 'Ꜳ', 'Ꜵ', 'Ꜷ', 'Ꜹ', 'Ꜻ', 'Ꜽ', 'Ꜿ', 'Ꝁ', 'Ꝃ', 'Ꝅ', 'Ꝇ', 'Ꝉ', 'Ꝋ', 'Ꝍ', 'Ꝏ', 'Ꝑ', 'Ꝓ', 'Ꝕ', 'Ꝗ', 'Ꝙ', 'Ꝛ', 'Ꝝ', 'Ꝟ', 'Ꝡ', 'Ꝣ', 'Ꝥ', 'Ꝧ', 'Ꝩ', 'Ꝫ', 'Ꝭ', 'Ꝯ', 'Ꝺ', 'Ꝼ', 'Ꞁ', 'Ꞃ', 'Ꞅ', 'Ꞇ', 'Ꞌ', 'Ɥ', 'Ꞑ', 'Ꞓ', 'Ꞗ', 'Ꞙ', 'Ꞛ', 'Ꞝ', 'Ꞟ', 'Ꞡ', 'Ꞣ', 'Ꞥ', 'Ꞧ', 'Ꞩ', 'Ꞷ', 'Ꞹ', 'Ꞻ', 'Ꞽ', 'Ꞿ', 'Ꟃ', 'Ꟊ', 'Ꟶ'),
    TITLECASE_LETTER(keys("Lt", "Titlecase_Letter"), "a letter that appears at the start of a word when only the first letter of the word is capitalized.", asList(GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_8, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_18, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_30), 'ǅ', 'ǈ', 'ǋ', 'ǲ', 'ᾼ', 'ῌ', 'ῼ'),
    MODIFIER_LETTER(keys("Lm", "Modifier_Letter"), "a special character that is used like a letter.", asList(SPACING_MODIFIER_LETTERS_LATIN_SUPERSCRIPT_MODIFIER_LETTERS_TO_MISCELLANEOUS_PHONETIC_MODIFIERS, SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_SUBSET_1, SPACING_MODIFIER_LETTERS_ADDITIONS_BASED_ON_1989_IPA_SUBSET, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_2, NKO_TONAL_APOSTROPHES, OL_CHIKI_MODIFIER_LETTERS, PHONETIC_EXTENSIONS_LATIN_SUPERSCRIPT_MODIFIER_LETTERS_TO_GREEK_SUBSCRIPT_MODIFIER_LETTERS, PHONETIC_EXTENSIONS_SUPPLEMENT_MODIFIER_LETTERS, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_TO_SUBSCRIPTS_FOR_UPA, LATIN_EXTENDED_C_ADDITIONS_FOR_UPA_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_KANA_REPEAT_MARKS, CJK_SYMBOLS_AND_PUNCTUATION_ITERATION_MARKS, KATAKANA_CONJUNCTION_AND_LENGTH_MARKS_TO_ITERATION_MARKS, LISU_TONES, CYRILLIC_EXTENDED_B_INTONATION_MARKS_FOR_LITHUANIAN_DIALECTOLOGY, MODIFIER_TONE_LETTERS_CHINANTEC_TONE_MARKS_TO_AFRICANIST_TONE_LETTERS, LATIN_EXTENDED_D_ADDITIONS_FOR_EXTENDED_IPA, MEETEI_MAYEK_EXTENSIONS_REPETITION_MARKS, LATIN_EXTENDED_E_MODIFIER_LETTERS_FOR_GERMAN_DIALECTOLOGY_SUBSET), 'ˬ', 'ˮ', 'ʹ', 'ͺ', 'ՙ', 'ـ', 'ߺ', 'ࠚ', 'ࠤ', 'ࠨ', 'ॱ', 'ๆ', 'ໆ', 'ჼ', 'ៗ', 'ᡃ', 'ᪧ', 'ᵸ', 'ⁱ', 'ⁿ', 'ⵯ', 'ⸯ', '々', '〻', 'ꀕ', 'ꘌ', 'ꙿ', 'ꝰ', 'ꞈ', 'ꧏ', 'ꧦ', 'ꩰ', 'ꫝ', 'ꭩ'),
    OTHER_LETTER(keys("Lo", "Other_Letter"), "a letter or ideograph that does not have lowercase and uppercase variants.", asList(LATIN_EXTENDED_B_AFRICAN_LETTERS_FOR_CLICKS, CYRILLIC_SUPPLEMENT_BASED_ON_ISO_8859_8_SUBSET, CYRILLIC_SUPPLEMENT_SIGN_TO_YIDDISH_DIGRAPHS, ARABIC_ADDITION_FOR_KASHMIRI_TO_ADDITIONS_FOR_EARLY_PERSIAN_AND_AZERBAIJANI, ARABIC_BASED_ON_ISO_8859_6_SUBSET, ARABIC_ARCHAIC_LETTERS, ARABIC_EXTENDED_ARABIC_LETTERS_TO_EXTENDED_ARABIC_LETTERS, ARABIC_EXTENDED_ARABIC_LETTERS_FOR_PARKARI, ARABIC_EXTENDED_ARABIC_LETTERS_2, SYRIAC_SYRIAC_LETTERS_TO_PERSIAN_LETTERS, SYRIAC_TO_THAANA, NKO_LETTERS_TO_ARCHAIC_LETTERS, SAMARITAN_LETTERS, MANDAIC_LETTERS, SYRIAC_SUPPLEMENT_SYRIAC_LETTERS_SUBSET, ARABIC_EXTENDED_A_ARABIC_LETTERS_FOR_AFRICAN_LANGUAGES_TO_ARABIC_LETTERS_FOR_ARWI, ARABIC_EXTENDED_A_ARABIC_LETTERS_FOR_BRAVANESE_TO_ARABIC_LETTERS_FOR_HAUSA_WOLOF_AND_OTHER_AFRICAN_ORTHOGRAPHIES, DEVANAGARI_INDEPENDENT_VOWELS_TO_CONSONANTS, DEVANAGARI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, DEVANAGARI_INDEPENDENT_VOWEL_FOR_MARATHI_TO_SINDHI_IMPLOSIVES, BENGALI_INDEPENDENT_VOWELS_SUBSET_2, BENGALI_INDEPENDENT_VOWELS_SUBSET_6, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS, BENGALI_CONSONANTS_SUBSET_4, BENGALI_CONSONANTS_SUBSET_15, BENGALI_ADDITIONAL_CONSONANTS_SUBSET_1, BENGALI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, BENGALI_ADDITIONS_FOR_ASSAMESE, BENGALI_INDEPENDENT_VOWELS_SUBSET, BENGALI_INDEPENDENT_VOWELS_SUBSET_7, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_1, BENGALI_CONSONANTS_SUBSET_5, BENGALI_CONSONANTS_SUBSET_10, BENGALI_CONSONANTS_SUBSET_12, BENGALI_CONSONANTS_SUBSET_16, BENGALI_ADDITIONAL_CONSONANTS_SUBSET, BENGALI_VOWEL_BASES_TO_VOWEL_BASES, BENGALI_INDEPENDENT_VOWELS_SUBSET_4, BENGALI_INDEPENDENT_VOWELS_SUBSET_9, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_2, BENGALI_CONSONANTS_SUBSET_6, BENGALI_CONSONANTS_SUBSET_9, BENGALI_CONSONANTS_SUBSET_13, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET, BENGALI_INDEPENDENT_VOWELS_SUBSET_3, BENGALI_INDEPENDENT_VOWELS_SUBSET_8, BENGALI_INDEPENDENT_VOWELS_TO_CONSONANTS_3, BENGALI_CONSONANTS_SUBSET_7, BENGALI_CONSONANTS_SUBSET_11, BENGALI_CONSONANTS_SUBSET_14, BENGALI_ADDITIONAL_CONSONANTS_SUBSET_2, BENGALI_ADDITIONAL_CONSONANTS_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT_1, BENGALI_INDEPENDENT_VOWELS_SUBSET_1, BENGALI_INDEPENDENT_VOWELS_SUBSET_5, BENGALI_INDEPENDENT_VOWELS_TO_INDEPENDENT_VOWELS, BENGALI_CONSONANTS_SUBSET, BENGALI_CONSONANTS_SUBSET_1, BENGALI_CONSONANTS_SUBSET_2, BENGALI_CONSONANTS_SUBSET_3, BENGALI_CONSONANTS_SUBSET_8, TELUGU_INDEPENDENT_VOWELS_SUBSET, TELUGU_INDEPENDENT_VOWELS_SUBSET_1, TELUGU_INDEPENDENT_VOWELS_TO_CONSONANTS, TELUGU_CONSONANTS_SUBSET, TELUGU_HISTORIC_PHONETIC_VARIANTS_SUBSET, TELUGU_ADDITIONAL_VOWELS_FOR_SANSKRIT, KANNADA_INDEPENDENT_VOWELS_SUBSET, KANNADA_INDEPENDENT_VOWELS_SUBSET_1, KANNADA_INDEPENDENT_VOWELS_TO_CONSONANTS, KANNADA_CONSONANTS_SUBSET, KANNADA_CONSONANTS_SUBSET_1, KANNADA_ADDITIONAL_VOWELS_FOR_SANSKRIT, KANNADA_SIGNS_USED_IN_SANSKRIT_SUBSET, MALAYALAM_VARIOUS_SIGNS_TO_INDEPENDENT_VOWELS, MALAYALAM_INDEPENDENT_VOWELS_SUBSET_1, MALAYALAM_INDEPENDENT_VOWELS_TO_CONSONANTS, MALAYALAM_ADDITIONAL_HISTORIC_CHILLU_LETTERS, MALAYALAM_ADDITIONAL_HISTORIC_VOWEL_TO_ADDITIONAL_VOWELS_FOR_SANSKRIT, MALAYALAM_CHILLU_LETTERS, MALAYALAM_INDEPENDENT_VOWELS_SUBSET, MALAYALAM_CONSONANTS_SUBSET_4, MALAYALAM_CONSONANTS_SUBSET_5, MALAYALAM_CONSONANTS_SUBSET, MALAYALAM_CONSONANTS_TO_SIGN, MALAYALAM_VOWELS_SUBSET_1, MALAYALAM_VOWELS_TO_VOWELS, MALAYALAM_CONSONANTS_SUBSET_1, MALAYALAM_CONSONANTS_SUBSET_2, MALAYALAM_CONSONANTS_SUBSET_3, MALAYALAM_CONSONANTS_TO_SIGN_1, MALAYALAM_VOWELS_SUBSET_2, MALAYALAM_VOWELS_SUBSET, MALAYALAM_DIGRAPHS_TO_CONSONANTS_FOR_KHMU, TIBETAN_CONSONANTS_SUBSET, TIBETAN_CONSONANTS_TO_EXTENSIONS_FOR_BALTI, TIBETAN_TRANSLITERATION_HEAD_LETTERS, MYANMAR_CONSONANTS_TO_INDEPENDENT_VOWELS, MYANMAR_PALI_AND_SANSKRIT_EXTENSIONS_SUBSET, MYANMAR_EXTENSIONS_FOR_MON_SUBSET, MYANMAR_EXTENSIONS_FOR_WESTERN_PWO_KAREN_SUBSET, MYANMAR_EXTENSIONS_FOR_EASTERN_PWO_KAREN, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET_4, HANGUL_JAMO_TO_ETHIOPIC, ETHIOPIC_SYLLABLES_SUBSET_3, ETHIOPIC_SYLLABLES_SUBSET_4, ETHIOPIC_SYLLABLES_SUBSET_9, ETHIOPIC_SYLLABLES_SUBSET_10, ETHIOPIC_SYLLABLES_SUBSET_2, ETHIOPIC_SYLLABLES_SUBSET_5, ETHIOPIC_SYLLABLES_SUBSET_11, ETHIOPIC_SYLLABLES_SUBSET_12, ETHIOPIC_SYLLABLES_SUBSET, ETHIOPIC_SYLLABLES_SUBSET_1, ETHIOPIC_SYLLABLES_SUBSET_7, ETHIOPIC_SYLLABLES_SUBSET_6, ETHIOPIC_SYLLABLES_SUBSET_8, ETHIOPIC_SUPPLEMENT_SYLLABLES_FOR_SEBATBEIT, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_SYLLABLES_TO_SYLLABLES_FOR_CARRIER, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_SYLLABLES_1, OGHAM_TRADITIONAL_LETTERS_TO_FORFEDA_SUPPLEMENTARY_LETTERS, RUNIC_LETTERS, RUNIC_TOLKIENIAN_EXTENSIONS_TO_CRYPTOGRAMMIC_LETTERS, TAGALOG_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGALOG_CONSONANTS_SUBSET, HANUNOO_INDEPENDENT_VOWELS_TO_CONSONANTS, BUHID_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGBANWA_INDEPENDENT_VOWELS_TO_CONSONANTS, TAGBANWA_CONSONANTS_SUBSET, KHMER_CONSONANTS_TO_INDEPENDENT_VOWELS, MONGOLIAN_BASIC_LETTERS, MONGOLIAN_TODO_LETTERS_TO_MANCHU_LETTERS, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET_2, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_SYLLABLES_FOR_MOOSE_CREE_TO_FINALS_FOR_DENE_AND_CARRIER, LIMBU_CONSONANTS_SUBSET, TAI_LE_CONSONANTS_TO_VOWELS, TAI_LE_TONE_LETTERS_SUBSET, NEW_TAI_LUE_CONSONANTS_SUBSET, NEW_TAI_LUE_VOWEL_SIGNS_TO_TONE_MARKS, BUGINESE_CONSONANTS, TAI_THAM_CONSONANTS_TO_CONSONANTS, BALINESE_INDEPENDENT_VOWELS_TO_CONSONANTS, BALINESE_ADDITIONAL_CONSONANTS_SUBSET, SUNDANESE_VOWELS_TO_CONSONANTS, SUNDANESE_ADDITIONAL_CONSONANTS, SUNDANESE_TO_BATAK, LEPCHA_CONSONANTS, LEPCHA_ADDITIONAL_LETTERS, OL_CHIKI_LETTERS, VEDIC_EXTENSIONS_NASALIZATION_SIGNS_SUBSET, VEDIC_EXTENSIONS_NASALIZATION_SIGNS_TO_ARDHAVISARGA, VEDIC_EXTENSIONS_SIGNS_SUBSET, LETTERLIKE_SYMBOLS_HEBREW_LETTERLIKE_MATH_SYMBOLS, TIFINAGH_LETTERS_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_MEEN_TO_SYLLABLES_FOR_BLIN, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_1, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_2, ETHIOPIC_EXTENDED_SYLLABLES_FOR_BENCH_SUBSET_3, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_1, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_2, ETHIOPIC_EXTENDED_SYLLABLES_FOR_SEBATBEIT_SUBSET_3, CJK_SYMBOLS_AND_PUNCTUATION_HIRAGANA_LETTERS_TO_SMALL_LETTERS, KATAKANA_KATAKANA_LETTERS, KATAKANA_BASED_ON_GB_2312_TO_MISCELLANEOUS_ADDITIONS, KATAKANA_CONSONANT_LETTERS_TO_OLD_VOWEL_LETTERS, BOPOMOFO_EXTENDED_EXTENDED_BOPOMOFO_FOR_MINNAN_AND_HAKKA_TO_EXTENDED_BOPOMOFO_FOR_CANTONESE, KATAKANA_PHONETIC_EXTENSIONS_PHONETIC_EXTENSIONS_FOR_AINU, CJK_COMPATIBILITY_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, YIJING_HEXAGRAM_SYMBOLS_CJK_UNIFIED_IDEOGRAPHS_SUBSET, YI_SYLLABLES_SYLLABLES, YI_SYLLABLES_SYLLABLES_SUBSET, LISU_CONSONANTS_TO_VOWELS, VAI_SYLLABLES_IN__EE_TO_SYLLABLES_IN__E, VAI_HISTORIC_SYLLABLES_TO_LOGOGRAMS, VAI_HISTORIC_SYLLABLES_SUBSET, BAMUM_SYLLABLES_SUBSET, LATIN_EXTENDED_D_TO_SYLOTI_NAGRI_1, SYLOTI_NAGRI_INDEPENDENT_VOWELS_AND_DVISVARA_SUBSET, SYLOTI_NAGRI_CONSONANTS_AND_CONSONANT_SIGNS_SUBSET, SYLOTI_NAGRI_CONSONANTS_AND_CONSONANT_SIGNS_SUBSET_1, PHAGS_PA_CONSONANTS_TO_CONSONANT_ADDITION_FOR_TIBETAN, SAURASHTRA_INDEPENDENT_VOWELS_TO_CONSONANTS, DEVANAGARI_EXTENDED_MARKS_OF_NASALIZATION, DEVANAGARI_EXTENDED_SIGNS_TO_SIGNS, KAYAH_LI_CONSONANTS_TO_VOWELS, REJANG_CONSONANTS, HANGUL_JAMO_EXTENDED_A_OLD_INITIAL_CONSONANTS_SUBSET, JAVANESE_LETTERS, MYANMAR_EXTENDED_B_ADDITIONS_FOR_SHAN_PALI_SUBSET, MYANMAR_EXTENDED_B_TAI_LAING_CONSONANTS, MYANMAR_EXTENDED_B_TAI_LAING_CONSONANTS_SUBSET, CHAM_INDEPENDENT_VOWELS_TO_CONSONANTS, CHAM_FINAL_CONSONANTS_SUBSET, CHAM_FINAL_CONSONANTS_SUBSET_1, MYANMAR_EXTENDED_A_KHAMTI_SHAN_CONSONANTS_SUBSET, MYANMAR_EXTENDED_A_KHAMTI_SHAN_CONSONANTS_TO_KHAMTI_SHAN_LOGOGRAMS_1, MYANMAR_EXTENDED_A_TO_TAI_VIET, TAI_VIET_VOWELS_AND_FINALS_SUBSET_1, TAI_VIET_VOWELS_AND_FINALS_SUBSET_3, TAI_VIET_WORD_LIGATURE_SYMBOLS, MEETEI_MAYEK_EXTENSIONS_INDEPENDENT_VOWEL_SIGNS_TO_CONSONANTS, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_AND_BASKETO_SUBSET, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_AND_BASKETO_SUBSET_1, MEETEI_MAYEK_EXTENSIONS_GAMO_GOFA_DAWRO_SUBSET, MEETEI_MAYEK_EXTENSIONS_GUMUZ_SUBSET, MEETEI_MAYEK_EXTENSIONS_GUMUZ_SUBSET_1, MEETEI_MAYEK_LETTERS_TO_FINAL_CONSONANTS, MEETEI_MAYEK_HANGUL_SYLLABLES_SUBSET, HANGUL_JAMO_EXTENDED_B_OLD_MEDIAL_VOWELS_SUBSET, HANGUL_JAMO_EXTENDED_B_OLD_FINAL_CONSONANTS_SUBSET), 'ª', 'º', 'ƻ', 'ʔ', 'ە', 'ۿ', 'ܐ', 'ޱ', 'ऽ', 'ॐ', 'ল', 'ঽ', 'ৎ', 'ৼ', 'ਫ਼', 'ઽ', 'ૐ', 'ૹ', 'ଽ', 'ୱ', 'ஃ', 'ஜ', 'ௐ', 'ఽ', 'ಀ', 'ಽ', 'ೞ', 'ഽ', 'ൎ', 'ල', 'ຄ', 'ລ', 'ຽ', 'ༀ', 'ဿ', 'ၡ', 'ႎ', 'ቘ', 'ዀ', 'ៜ', 'ᢪ', 'ᳺ', '〆', '〼', 'ゟ', 'ヿ', 'ꙮ', 'ꞏ', 'ꟷ', 'ꣻ', 'ꩺ', 'ꪱ', 'ꫀ', 'ꫂ', 'ꫲ'),
    MARK(keys("M", "Mark"), "a character intended to be combined with another character (e.g. accents, umlauts, enclosing boxes, etc.).", asList(COMBINING_DIACRITICAL_MARKS_ORDINARY_DIACRITICS_TO_MEDIEVAL_SUPERSCRIPT_LETTER_DIACRITICS, CYRILLIC_HISTORIC_MISCELLANEOUS_SUBSET_1, CYRILLIC_SUPPLEMENT_CANTILLATION_MARKS_TO_POINTS_AND_PUNCTUATION, CYRILLIC_SUPPLEMENT_POINTS_AND_PUNCTUATION_SUBSET, CYRILLIC_SUPPLEMENT_PUNCTA_EXTRAORDINARIA, ARABIC_HONORIFICS_TO_QURANIC_ANNOTATION_SIGNS, ARABIC_TASHKIL_FROM_ISO_8859_6_TO_OTHER_COMBINING_MARKS, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_1, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_3, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_4, SYRIAC_SYRIAC_POINTS_VOWELS_TO_SYRIAC_MARKS, THAANA_VOWELS, NKO_TONE_MARKS_TO_OTHER_DIACRITICS, SAMARITAN_CONSONANT_MODIFIERS_SUBSET, SAMARITAN_CONSONANT_MODIFIERS_TO_VOWEL_SIGNS, SAMARITAN_VOWEL_SIGNS_SUBSET, SAMARITAN_VOWEL_SIGNS_TO_VOWEL_SIGNS, MANDAIC_DIACRITICS_SUBSET, ARABIC_EXTENDED_A_QURANIC_ANNOTATION_SIGNS_SUBSET, ARABIC_EXTENDED_A_TO_DEVANAGARI_1, DEVANAGARI_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, DEVANAGARI_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS_1, DEVANAGARI_VEDIC_TONE_MARKS_TO_DEPENDENT_VOWEL_SIGNS_FOR_KASHMIRI, DEVANAGARI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET, BENGALI_VARIOUS_SIGNS_SUBSET_2, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_19, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_6, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS_1, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET_1, BENGALI_VARIOUS_SIGNS_SUBSET_3, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_16, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_7, BENGALI_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, BENGALI_SIGNS_1, BENGALI_VARIOUS_SIGNS_SUBSET_4, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_18, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_10, BENGALI_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS_1, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET_2, BENGALI_TRANSLITERATION_SIGNS, BENGALI_VARIOUS_SIGNS_SUBSET_5, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_20, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_8, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS_2, BENGALI_VARIOUS_SIGNS_SUBSET_9, BENGALI_DEPENDENT_VOWELS, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_17, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_5, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS, TELUGU_VARIOUS_SIGNS, TELUGU_DEPENDENT_VOWEL_SIGNS_SUBSET_3, TELUGU_DEPENDENT_VOWEL_SIGNS_SUBSET_1, TELUGU_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, TELUGU_VARIOUS_SIGNS_SUBSET_1, TELUGU_DEPENDENT_VOWELS, KANNADA_VARIOUS_SIGNS_SUBSET, KANNADA_DEPENDENT_VOWEL_SIGNS_SUBSET_4, KANNADA_DEPENDENT_VOWEL_SIGNS_SUBSET_1, KANNADA_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, KANNADA_VARIOUS_SIGNS_SUBSET_2, KANNADA_DEPENDENT_VOWELS, MALAYALAM_VARIOUS_SIGNS_SUBSET_1, MALAYALAM_VARIANT_SHAPE_VIRAMAS, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_6, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_1, MALAYALAM_TWO_PART_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS, MALAYALAM_DEPENDENT_VOWELS, MALAYALAM_VARIOUS_SIGNS_SUBSET_2, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_3, MALAYALAM_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS, MALAYALAM_ADDITIONAL_DEPENDENT_VOWEL_SIGNS, MALAYALAM_VOWELS_SUBSET_3, MALAYALAM_VOWEL_TO_SIGNS, MALAYALAM_VOWELS_TO_VOWEL, MALAYALAM_TONE_MARKS_TO_SIGNS, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET_1, TIBETAN_ASTROLOGICAL_SIGNS_1, TIBETAN_DEPENDENT_VOWEL_SIGNS_TO_MARKS_AND_SIGNS_1, TIBETAN_MARKS_AND_SIGNS_SUBSET, TIBETAN_TRANSLITERATION_SUBJOINED_SIGNS_TO_SUBJOINED_CONSONANTS, TIBETAN_SUBJOINED_CONSONANTS_TO_FIXED_FORM_SUBJOINED_CONSONANTS, MYANMAR_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_CONSONANT_SIGNS, MYANMAR_PALI_AND_SANSKRIT_EXTENSIONS_SUBSET_1, MYANMAR_EXTENSIONS_FOR_MON_SUBSET_1, MYANMAR_EXTENSIONS_FOR_SGAW_KAREN_SUBSET, MYANMAR_EXTENSIONS_FOR_WESTERN_PWO_KAREN_SUBSET_1, MYANMAR_EXTENSION_FOR_GEBA_KAREN_TO_EXTENSIONS_FOR_KAYAH, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET, MYANMAR_EXTENSIONS_FOR_KHAMTI_SHAN_TO_EXTENSIONS_FOR_AITON_AND_PHAKE, ETHIOPIC_COMBINING_MARKS, TAGALOG_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, HANUNOO_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, BUHID_DEPENDENT_VOWEL_SIGNS_SUBSET, TAGBANWA_DEPENDENT_VOWEL_SIGNS_SUBSET, KHMER_INHERENT_VOWELS_TO_VARIOUS_SIGNS, MONGOLIAN_FORMAT_CONTROLS_SUBSET, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET_1, LIMBU_DEPENDENT_VOWEL_SIGNS_TO_SUBJOINED_CONSONANTS, LIMBU_FINAL_CONSONANTS_TO_VARIOUS_SIGNS, BUGINESE_VOWELS_SUBSET_1, TAI_THAM_CONSONANT_SIGNS_SUBSET, TAI_THAM_SIGN_TO_OTHER_MARKS, COMBINING_DIACRITICAL_MARKS_EXTENDED_USED_IN_GERMAN_DIALECTOLOGY_TO_USED_FOR_SCOTS_DIALECTOLOGY, BALINESE_VARIOUS_SIGNS, BALINESE_SIGN_TO_DEPENDENT_VOWEL_SIGNS, BALINESE_DIACRITICAL_MARKS_FOR_MUSICAL_SYMBOLS, SUNDANESE_VARIOUS_SIGNS, SUNDANESE_CONSONANT_SIGNS_TO_CONSONANT_SIGNS, BATAK_SIGN_TO_SIGNS, LEPCHA_SUBJOINED_CONSONANTS_TO_VARIOUS_SIGNS, VEDIC_EXTENSIONS_TONE_MARKS_FOR_THE_SAMAVEDA, VEDIC_EXTENSIONS_SIGNS_FOR_YAJURVEDIC_TO_DIACRITICS_FOR_VISARGA, VEDIC_EXTENSIONS_SIGNS_TO_SIGNS_FOR_JAIMINIYA_SAMA_VEDA, COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_USED_FOR_ANCIENT_GREEK_TO_MISCELLANEOUS_MARKS, COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_MISCELLANEOUS_MARKS_TO_ADDITIONAL_MARKS_FOR_UPA, COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_TO_ADDITIONAL_DIACRITICAL_MARKS_FOR_SYMBOLS, COPTIC_COMBINING_MARKS, CYRILLIC_EXTENDED_A_OLD_CHURCH_SLAVONIC_COMBINING_LETTERS, CJK_SYMBOLS_AND_PUNCTUATION_COMBINING_TONE_MARKS, CJK_SYMBOLS_AND_PUNCTUATION_VOICING_MARKS_SUBSET, CYRILLIC_EXTENDED_B_ABBREVIATION_MARK_TO_COMBINING_NUMERIC_SIGNS, CYRILLIC_EXTENDED_B_COMBINING_MARKS_FOR_OLD_CYRILLIC, CYRILLIC_EXTENDED_B_COMBINING_MARKS_FOR_OLD_CYRILLIC_1, BAMUM_COMBINING_MARKS, SYLOTI_NAGRI_DEPENDENT_VOWEL_SIGNS, SAURASHTRA_VARIOUS_SIGNS, SAURASHTRA_CONSONANTS_TO_VIRAMA, DEVANAGARI_EXTENDED_CANTILLATION_MARKS_SVARA_FOR_THE_SAMAVEDA, KAYAH_LI_VOWELS_TO_TONE_MARKS, REJANG_VOWEL_SIGNS_TO_CONSONANT_SIGNS, JAVANESE_VARIOUS_SIGNS, JAVANESE_SIGN_TO_DEPENDENT_CONSONANT_SIGNS, CHAM_DEPENDENT_VOWEL_SIGNS_TO_CONSONANT_SIGNS, CHAM_FINAL_CONSONANTS_SUBSET_2, MYANMAR_EXTENDED_A_PAO_KAREN_TONE_MARK_TO_TAI_LAING_TONE_MARKS, TAI_VIET_VOWELS_AND_FINALS_SUBSET, TAI_VIET_VOWELS_AND_FINALS_SUBSET_2, TAI_VIET_VOWELS_AND_FINALS_TO_VOWELS_AND_FINALS, MEETEI_MAYEK_EXTENSIONS_DEPENDENT_VOWEL_SIGNS, MEETEI_MAYEK_EXTENSIONS_SIGN_TO_SIGN, MEETEI_MAYEK_DEPENDENT_VOWEL_SIGNS, MEETEI_MAYEK_PUNCTUATION_SUBSET), 'ֿ', 'ׇ', 'ٰ', 'ܑ', '߽', '়', 'ৗ', '৾', '਼', 'ੑ', 'ੵ', '઼', '଼', 'ஂ', 'ௗ', '಼', 'ൗ', '්', 'ූ', 'ั', 'ັ', '༵', '༷', '༹', '࿆', 'ႏ', '៝', 'ᢩ', '᩿', '᳭', '᳴', '⵿', 'ꠂ', '꠆', 'ꠋ', '꠬', 'ꣿ', 'ꧥ', 'ꩃ', 'ꪰ', '꫁'),
    NON_SPACING_MARK(keys("Mn", "Non_Spacing_Mark"), "a character intended to be combined with another character without taking up extra space (e.g. accents, umlauts, etc.).", asList(COMBINING_DIACRITICAL_MARKS_ORDINARY_DIACRITICS_TO_MEDIEVAL_SUPERSCRIPT_LETTER_DIACRITICS, CYRILLIC_HISTORIC_MISCELLANEOUS_SUBSET, CYRILLIC_SUPPLEMENT_CANTILLATION_MARKS_TO_POINTS_AND_PUNCTUATION, CYRILLIC_SUPPLEMENT_POINTS_AND_PUNCTUATION_SUBSET, CYRILLIC_SUPPLEMENT_PUNCTA_EXTRAORDINARIA, ARABIC_HONORIFICS_TO_QURANIC_ANNOTATION_SIGNS, ARABIC_TASHKIL_FROM_ISO_8859_6_TO_OTHER_COMBINING_MARKS, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_1, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_3, ARABIC_QURANIC_ANNOTATION_SIGNS_SUBSET_4, SYRIAC_SYRIAC_POINTS_VOWELS_TO_SYRIAC_MARKS, THAANA_VOWELS, NKO_TONE_MARKS_TO_OTHER_DIACRITICS, SAMARITAN_CONSONANT_MODIFIERS_SUBSET, SAMARITAN_CONSONANT_MODIFIERS_TO_VOWEL_SIGNS, SAMARITAN_VOWEL_SIGNS_SUBSET, SAMARITAN_VOWEL_SIGNS_TO_VOWEL_SIGNS, MANDAIC_DIACRITICS_SUBSET, ARABIC_EXTENDED_A_QURANIC_ANNOTATION_SIGNS_SUBSET, ARABIC_EXTENDED_A_TO_DEVANAGARI, DEVANAGARI_DEPENDENT_VOWEL_SIGNS_SUBSET, DEVANAGARI_VEDIC_TONE_MARKS_TO_DEPENDENT_VOWEL_SIGNS_FOR_KASHMIRI, DEVANAGARI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_2, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET_1, BENGALI_VARIOUS_SIGNS_SUBSET, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_7, BENGALI_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, BENGALI_SIGNS_1, BENGALI_VARIOUS_SIGNS_SUBSET_1, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_4, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_9, BENGALI_ADDITIONAL_VOWELS_FOR_SANSKRIT_SUBSET_2, BENGALI_TRANSLITERATION_SIGNS, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_3, BENGALI_VARIOUS_SIGNS_SUBSET_8, BENGALI_DEPENDENT_VOWELS, TELUGU_DEPENDENT_VOWEL_SIGNS_SUBSET_2, TELUGU_DEPENDENT_VOWEL_SIGNS_SUBSET_1, TELUGU_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, TELUGU_VARIOUS_SIGNS_SUBSET_1, TELUGU_DEPENDENT_VOWELS, KANNADA_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS_1, KANNADA_DEPENDENT_VOWELS, MALAYALAM_VARIOUS_SIGNS_SUBSET, MALAYALAM_VARIANT_SHAPE_VIRAMAS, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET, MALAYALAM_DEPENDENT_VOWELS, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_4, MALAYALAM_VOWELS_SUBSET_3, MALAYALAM_VOWEL_TO_SIGNS, MALAYALAM_VOWELS_TO_VOWEL, MALAYALAM_TONE_MARKS_TO_SIGNS, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET_1, TIBETAN_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, TIBETAN_DEPENDENT_VOWEL_SIGNS_TO_MARKS_AND_SIGNS, TIBETAN_MARKS_AND_SIGNS_SUBSET, TIBETAN_TRANSLITERATION_SUBJOINED_SIGNS_TO_SUBJOINED_CONSONANTS, TIBETAN_SUBJOINED_CONSONANTS_TO_FIXED_FORM_SUBJOINED_CONSONANTS, MYANMAR_DEPENDENT_VOWEL_SIGNS_SUBSET_1, MYANMAR_DEPENDENT_VOWEL_SIGNS_TO_VARIOUS_SIGNS, MYANMAR_VIRAMA_AND_KILLER, MYANMAR_DEPENDENT_CONSONANT_SIGNS_SUBSET_1, MYANMAR_PALI_AND_SANSKRIT_EXTENSIONS_SUBSET_3, MYANMAR_EXTENSIONS_FOR_MON_SUBSET_1, MYANMAR_EXTENSION_FOR_GEBA_KAREN_TO_EXTENSIONS_FOR_KAYAH, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET_2, ETHIOPIC_COMBINING_MARKS, TAGALOG_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, HANUNOO_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, BUHID_DEPENDENT_VOWEL_SIGNS_SUBSET, TAGBANWA_DEPENDENT_VOWEL_SIGNS_SUBSET, KHMER_INHERENT_VOWELS, KHMER_DEPENDENT_VOWEL_SIGNS_SUBSET, KHMER_CONSONANT_SHIFTERS_TO_VARIOUS_SIGNS, MONGOLIAN_FORMAT_CONTROLS_SUBSET, MONGOLIAN_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN_SUBSET_1, LIMBU_DEPENDENT_VOWEL_SIGNS_SUBSET, LIMBU_DEPENDENT_VOWEL_SIGNS_SUBSET_2, LIMBU_VARIOUS_SIGNS_SUBSET_1, BUGINESE_VOWELS_SUBSET, TAI_THAM_CONSONANT_SIGNS_SUBSET_1, TAI_THAM_DEPENDENT_VOWEL_SIGNS_SUBSET_1, TAI_THAM_DEPENDENT_VOWEL_SIGNS_TO_OTHER_MARKS, COMBINING_DIACRITICAL_MARKS_EXTENDED_USED_IN_GERMAN_DIALECTOLOGY_TO_MARKS_SURROUNDING_OTHER_DIACRITICS_OR_LETTERS, COMBINING_DIACRITICAL_MARKS_EXTENDED_USED_FOR_SCOTS_DIALECTOLOGY, BALINESE_VARIOUS_SIGNS_SUBSET, BALINESE_DEPENDENT_VOWEL_SIGNS_SUBSET, BALINESE_DIACRITICAL_MARKS_FOR_MUSICAL_SYMBOLS, SUNDANESE_VARIOUS_SIGNS_SUBSET, SUNDANESE_CONSONANT_SIGNS_TO_VOWEL_SIGNS, SUNDANESE_VOWEL_SIGNS_SUBSET_1, SUNDANESE_VIRAMAS_TO_CONSONANT_SIGNS, BATAK_DEPENDENT_VOWEL_SIGNS_SUBSET, BATAK_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_CONSONANT_SIGNS, LEPCHA_DEPENDENT_VOWELS_TO_CONSONANT_SIGNS, LEPCHA_VARIOUS_SIGNS_SUBSET, VEDIC_EXTENSIONS_TONE_MARKS_FOR_THE_SAMAVEDA, VEDIC_EXTENSIONS_SIGNS_FOR_YAJURVEDIC_TO_TONE_MARKS_FOR_THE_SATAPATHABRAHMANA, VEDIC_EXTENSIONS_DIACRITICS_FOR_VISARGA, VEDIC_EXTENSIONS_SIGNS_FOR_JAIMINIYA_SAMA_VEDA, COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_USED_FOR_ANCIENT_GREEK_TO_MISCELLANEOUS_MARKS, COMBINING_DIACRITICAL_MARKS_SUPPLEMENT_MISCELLANEOUS_MARKS_TO_ADDITIONAL_MARKS_FOR_UPA, COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_ADDITIONAL_DIACRITICAL_MARKS_FOR_SYMBOLS_SUBSET, COPTIC_COMBINING_MARKS, CYRILLIC_EXTENDED_A_OLD_CHURCH_SLAVONIC_COMBINING_LETTERS, CJK_SYMBOLS_AND_PUNCTUATION_COMBINING_TONE_MARKS_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_VOICING_MARKS_SUBSET, CYRILLIC_EXTENDED_B_COMBINING_MARKS_FOR_OLD_CYRILLIC, CYRILLIC_EXTENDED_B_COMBINING_MARKS_FOR_OLD_CYRILLIC_1, BAMUM_COMBINING_MARKS, SYLOTI_NAGRI_DEPENDENT_VOWEL_SIGNS_SUBSET_1, SAURASHTRA_VIRAMA_TO_VIRAMA, DEVANAGARI_EXTENDED_CANTILLATION_MARKS_SVARA_FOR_THE_SAMAVEDA, KAYAH_LI_VOWELS_TO_TONE_MARKS, REJANG_VOWEL_SIGNS_TO_CONSONANT_SIGNS_1, JAVANESE_VARIOUS_SIGNS_SUBSET, JAVANESE_DEPENDENT_VOWEL_SIGNS_SUBSET_1, JAVANESE_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, CHAM_DEPENDENT_VOWEL_SIGNS_SUBSET, CHAM_DEPENDENT_VOWEL_SIGNS_SUBSET_2, CHAM_CONSONANT_SIGNS_SUBSET_1, TAI_VIET_VOWELS_AND_FINALS_SUBSET, TAI_VIET_VOWELS_AND_FINALS_SUBSET_2, TAI_VIET_VOWELS_AND_FINALS_TO_VOWELS_AND_FINALS, MEETEI_MAYEK_EXTENSIONS_DEPENDENT_VOWEL_SIGNS_SUBSET), 'ֿ', 'ׇ', 'ٰ', 'ܑ', '߽', 'ऺ', '़', '्', 'ঁ', '়', '্', '৾', '਼', 'ੑ', 'ੵ', '઼', '્', 'ଁ', '଼', 'ି', '୍', 'ஂ', 'ீ', '்', 'ఀ', 'ఄ', 'ಁ', '಼', 'ಿ', 'ೆ', '്', 'ඁ', '්', 'ූ', 'ั', 'ັ', '༵', '༷', '༹', '࿆', 'ႂ', 'ႍ', 'ႝ', 'ំ', '៝', 'ᢩ', 'ᤲ', 'ᨛ', 'ᩖ', '᩠', 'ᩢ', '᩿', '᬴', 'ᬼ', 'ᭂ', '᯦', 'ᯭ', '᳭', '᳴', '⃡', '⵿', '꙯', 'ꠂ', '꠆', 'ꠋ', '꠬', 'ꣿ', '꦳', 'ꧥ', 'ꩃ', 'ꩌ', 'ꩼ', 'ꪰ', '꫁', '꫶', 'ꯥ', 'ꯨ', '꯭'),
    SPACING_COMBINING_MARK(keys("Mc", "Spacing_Combining_Mark"), "a character intended to be combined with another character that takes up extra space (vowel signs in many Eastern languages).", asList(DEVANAGARI_DEPENDENT_VOWEL_SIGNS_SUBSET_2, DEVANAGARI_DEPENDENT_VOWEL_SIGNS_SUBSET_1, DEVANAGARI_DEPENDENT_VOWEL_SIGNS_2, BENGALI_VARIOUS_SIGNS_SUBSET_6, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_13, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_6, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_14, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_15, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_11, BENGALI_VARIOUS_SIGNS_SUBSET_7, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_8, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS_1, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_12, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_1, BENGALI_DEPENDENT_VOWEL_SIGNS_SUBSET_5, BENGALI_TWO_PART_DEPENDENT_VOWEL_SIGNS_2, TELUGU_VARIOUS_SIGNS_SUBSET, TELUGU_DEPENDENT_VOWEL_SIGNS_SUBSET, KANNADA_VARIOUS_SIGNS_SUBSET_1, KANNADA_DEPENDENT_VOWEL_SIGNS_SUBSET, KANNADA_DEPENDENT_VOWEL_SIGNS_SUBSET_2, KANNADA_DEPENDENT_VOWEL_SIGNS_SUBSET_3, KANNADA_VARIOUS_SIGNS_SUBSET_2, MALAYALAM_VARIOUS_SIGNS_SUBSET_4, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_5, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_1, MALAYALAM_TWO_PART_DEPENDENT_VOWEL_SIGNS, MALAYALAM_VARIOUS_SIGNS_SUBSET_3, MALAYALAM_DEPENDENT_VOWEL_SIGNS_SUBSET_2, MALAYALAM_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS, MALAYALAM_ADDITIONAL_DEPENDENT_VOWEL_SIGNS, TIBETAN_ASTROLOGICAL_SIGNS_1, MYANMAR_DEPENDENT_VOWEL_SIGNS_SUBSET, MYANMAR_DEPENDENT_CONSONANT_SIGNS_SUBSET, MYANMAR_PALI_AND_SANSKRIT_EXTENSIONS_SUBSET_2, MYANMAR_EXTENSIONS_FOR_SGAW_KAREN_SUBSET, MYANMAR_EXTENSIONS_FOR_WESTERN_PWO_KAREN_SUBSET_1, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET_1, MYANMAR_EXTENSIONS_FOR_SHAN_SUBSET_3, MYANMAR_EXTENSIONS_FOR_KHAMTI_SHAN_TO_EXTENSIONS_FOR_KHAMTI_SHAN, KHMER_TWO_PART_DEPENDENT_VOWEL_SIGNS_TO_TWO_PART_DEPENDENT_VOWEL_SIGNS, KHMER_VARIOUS_SIGNS_SUBSET, LIMBU_DEPENDENT_VOWEL_SIGNS_SUBSET_1, LIMBU_SUBJOINED_CONSONANTS_SUBSET, LIMBU_FINAL_CONSONANTS_SUBSET, LIMBU_FINAL_CONSONANTS_SUBSET_1, BUGINESE_VOWELS_SUBSET_2, TAI_THAM_DEPENDENT_VOWEL_SIGNS_SUBSET, TAI_THAM_DEPENDENT_VOWEL_SIGNS_SUBSET_2, BALINESE_DEPENDENT_VOWEL_SIGNS_SUBSET_1, BALINESE_DEPENDENT_VOWEL_SIGNS_TO_DEPENDENT_VOWEL_SIGNS, SUNDANESE_VOWEL_SIGNS_SUBSET, BATAK_DEPENDENT_VOWEL_SIGNS_SUBSET_1, BATAK_SIGNS_SUBSET, LEPCHA_SUBJOINED_CONSONANTS_TO_DEPENDENT_VOWELS, LEPCHA_CONSONANT_SIGNS_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_COMBINING_TONE_MARKS_SUBSET_1, SYLOTI_NAGRI_DEPENDENT_VOWEL_SIGNS_SUBSET, SAURASHTRA_VARIOUS_SIGNS, SAURASHTRA_CONSONANTS_TO_DEPENDENT_VOWEL_SIGNS, REJANG_CONSONANT_SIGNS_TO_CONSONANT_SIGNS, JAVANESE_DEPENDENT_VOWEL_SIGNS_SUBSET, JAVANESE_DEPENDENT_VOWEL_SIGNS_SUBSET_2, JAVANESE_DEPENDENT_CONSONANT_SIGNS_TO_DEPENDENT_CONSONANT_SIGNS, CHAM_DEPENDENT_VOWEL_SIGNS_SUBSET_1, CHAM_CONSONANT_SIGNS_SUBSET, MEETEI_MAYEK_EXTENSIONS_DEPENDENT_VOWEL_SIGNS_SUBSET_1, MEETEI_MAYEK_DEPENDENT_VOWEL_SIGNS_SUBSET, MEETEI_MAYEK_DEPENDENT_VOWEL_SIGNS_SUBSET_1, MEETEI_MAYEK_DEPENDENT_VOWEL_SIGNS_SUBSET_2), 'ः', 'ऻ', 'ৗ', 'ਃ', 'ઃ', 'ૉ', 'ା', 'ୀ', 'ୗ', 'ௗ', 'ಾ', 'ൗ', 'ཿ', 'ေ', 'း', 'ႏ', 'ា', 'ᩕ', 'ᩗ', 'ᩡ', 'ᬄ', 'ᬵ', 'ᬻ', 'ᮂ', 'ᮡ', '᮪', 'ᯧ', 'ᯮ', '᳡', '᳷', 'ꠧ', 'ꦃ', 'ꩍ', 'ꩻ', 'ꩽ', 'ꫫ', 'ꫵ', '꯬'),
    ENCLOSING_MARK(keys("Me", "Enclosing_Mark"), "a character that encloses the character it is combined with (circle, square, keycap, etc.).", asList(CYRILLIC_HISTORIC_MISCELLANEOUS_SUBSET_2, COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_ENCLOSING_DIACRITICS, COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS_ADDITIONAL_ENCLOSING_DIACRITICS, CYRILLIC_EXTENDED_B_COMBINING_NUMERIC_SIGNS), '᪾'),
    SEPARATOR(keys("Z", "Separator"), "any kind of whitespace or invisible separator.", asList(GENERAL_PUNCTUATION_SPACES, GENERAL_PUNCTUATION_SEPARATORS), ' ', ' ', ' ', ' ', ' ', '　'),
    SPACE_SEPARATOR(keys("Zs", "Space_Separator"), "a whitespace character that is invisible, but does take up space.", GENERAL_PUNCTUATION_SPACES, new char[]{' ', ' ', ' ', ' ', ' ', '　'}),
    LINE_SEPARATOR(keys("Zl", "Line_Separator"), "line separator character U+2028.", ' '),
    PARAGRAPH_SEPARATOR(keys("Zp", "Paragraph_Separator"), "paragraph separator character U+2029.", ' '),
    SYMBOL(keys("S", "Symbol"), "math symbols, currency signs, dingbats, box-drawing characters, etc.", asList(BASIC_LATIN_ASCII_MATHEMATICAL_OPERATORS, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_1, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_2, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_3, SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_SUBSET, SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_TO_ADDITIONS_BASED_ON_1989_IPA, SPACING_MODIFIER_LETTERS_TONE_LETTERS_TO_EXTENDED_BOPOMOFO_TONE_MARKS, SPACING_MODIFIER_LETTERS_UPA_MODIFIERS, GREEK_AND_COPTIC_SPACING_ACCENT_MARKS, CYRILLIC_SUPPLEMENT_RELIGIOUS_SYMBOLS_TO_RELIGIOUS_SYMBOLS, ARABIC_RADIX_SYMBOLS_TO_RADIX_SYMBOLS, ARABIC_POETIC_MARKS, ARABIC_SIGNS_FOR_SINDHI, NKO_CURRENCY_SYMBOLS, BENGALI_CURRENCY_SYMBOLS, BENGALI_SIGN_TO_SIGN, BENGALI_TAMIL_CALENDRICAL_SYMBOLS_TO_CURRENCY_SYMBOL, TIBETAN_HEAD_MARKS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET_2, TIBETAN_SIGNS_TO_SYMBOLS, TIBETAN_SYMBOLS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_2, TIBETAN_RELIGIOUS_SYMBOLS, MYANMAR_SHAN_SYMBOLS, ETHIOPIC_SUPPLEMENT_TONAL_MARKS_SUBSET, NEW_TAI_LUE_TO_KHMER_SYMBOLS, BALINESE_MUSICAL_SYMBOLS_FOR_NOTES, BALINESE_MUSICAL_SYMBOLS, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_45, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_10, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_20, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_32, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_44, SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_SUBSET_1, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_SUBSET_1, CURRENCY_SYMBOLS_CURRENCY_SYMBOLS_SUBSET, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_1, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_2, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_8, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_10, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET_1, LETTERLIKE_SYMBOLS_DOUBLE_STRUCK_LARGE_OPERATOR_TO_ADDITIONAL_LETTERLIKE_SYMBOLS, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_2, NUMBER_FORMS_TURNED_DIGITS_SUBSET, ARROWS_TO_MISCELLANEOUS_TECHNICAL, MISCELLANEOUS_TECHNICAL_CROPS_TO_KEYBOARD_SYMBOLS, MISCELLANEOUS_TECHNICAL_TO_CONTROL_PICTURES_1, OPTICAL_CHARACTER_RECOGNITION_OCR_A_TO_MICR, ENCLOSED_ALPHANUMERICS_PARENTHESIZED_LATIN_LETTERS_TO_CIRCLED_LATIN_LETTERS, BOX_DRAWING_TO_DINGBATS, DINGBATS_TO_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_OPERATOR_TO_MODAL_LOGIC_OPERATORS, SUPPLEMENTAL_ARROWS_A_TO_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_FENCES_TO_BOWTIE_SYMBOLS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_TO_SPECIALIZED_PLUS_SIGN_OPERATORS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_TO_MISCELLANEOUS_SYMBOLS_AND_ARROWS, MISCELLANEOUS_SYMBOLS_AND_ARROWS_TRIANGLE_HEADED_ARROWS_TO_MISCELLANEOUS_ARROW_SYMBOL, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MISCELLANEOUS_SYMBOL_TO_SYMBOLS_USED_IN_CHESS_NOTATION, COPTIC_SYMBOLS_SUBSET, SUPPLEMENTAL_PUNCTUATION_HISTORIC_PUNCTUATION_SUBSET, CJK_RADICALS_SUPPLEMENT_CJK_RADICALS_SUPPLEMENT_SUBSET, CJK_RADICALS_SUPPLEMENT_CJK_RADICALS_SUPPLEMENT_SUBSET_1, KANGXI_RADICALS_KANGXI_RADICALS_SUBSET, IDEOGRAPHIC_DESCRIPTION_CHARACTERS_IDEOGRAPHIC_DESCRIPTION_CHARACTERS_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS, CJK_SYMBOLS_AND_PUNCTUATION_OTHER_CJK_SYMBOLS, CJK_SYMBOLS_AND_PUNCTUATION_SPECIAL_CJK_INDICATORS, CJK_SYMBOLS_AND_PUNCTUATION_VOICING_MARKS_SUBSET_1, KANBUN_TATETEN_TO_TATETEN, KANBUN_KAERITEN_SUBSET_1, CJK_STROKES_CJK_STROKES_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_HANGUL_LETTERS_TO_PARENTHESIZED_KOREAN_WORDS, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_IDEOGRAPHS_TO_CIRCLED_IDEOGRAPHS_FROM_ARIB_STD_B24, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_HANGUL_LETTERS_TO_CIRCLED_HANGUL_SYLLABLE, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_IDEOGRAPHS_SUBSET_1, ENCLOSED_CJK_LETTERS_AND_MONTHS_TO_CJK_COMPATIBILITY, YIJING_HEXAGRAM_SYMBOLS_YIJING_HEXAGRAM_SYMBOLS, YI_RADICALS_YI_RADICALS_SUBSET, MODIFIER_TONE_LETTERS_CORNER_TONE_MARKS_FOR_CHINESE_TO_LEFT_STEM_TONE_LETTERS, LATIN_EXTENDED_D_ADDITIONS_FOR_UPA, LATIN_EXTENDED_D_MODIFIER_LETTERS_SUBSET, SYLOTI_NAGRI_POETRY_MARKS, COMMON_INDIC_NUMBER_FORMS_NUMBER_FORMS_TO_CURRENCY_SYMBOL, MYANMAR_EXTENDED_A_AITON_SYMBOLS_AND_LETTERS_SUBSET, LATIN_EXTENDED_E_LETTERS_FOR_SCOTS_DIALECTOLOGY_SUBSET), '$', '+', '^', '`', '|', '~', '¬', '´', '¸', '×', '÷', '˭', '͵', '϶', '҂', '؋', '۞', '۩', '߶', '૱', '୰', '౿', '൏', '൹', '฿', '༓', '༴', '༶', '༸', '᙭', '៛', '᥀', '᾽', '⁄', '⁒', '℔', '℥', '℧', '℩', '℮', '⅏', '〄', '〠', '㉐', '꭛'),
    MATH_SYMBOL(keys("Sm", "Math_Symbol"), "any mathematical symbol.", asList(BASIC_LATIN_ASCII_MATHEMATICAL_OPERATORS, ARABIC_RADIX_SYMBOLS_TO_RADIX_SYMBOLS, SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_SUBSET_1, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_SUBSET_1, LETTERLIKE_SYMBOLS_DOUBLE_STRUCK_LARGE_OPERATOR_TO_ADDITIONAL_LETTERLIKE_SYMBOLS, ARROWS_SIMPLE_ARROWS_SUBSET, ARROWS_ARROWS_WITH_MODIFICATIONS_SUBSET, ARROWS_DOUBLE_ARROWS_SUBSET, ARROWS_TO_MATHEMATICAL_OPERATORS, MISCELLANEOUS_TECHNICAL_INTEGRAL_PIECES, MISCELLANEOUS_TECHNICAL_BRACKET_PIECES_TO_SUMMATION_SIGN_PARTS, MISCELLANEOUS_TECHNICAL_HORIZONTAL_BRACKETS_1, GEOMETRIC_SHAPES_GEOMETRIC_SHAPES_1, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_MISCELLANEOUS_SYMBOLS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_OPERATOR_TO_MODAL_LOGIC_OPERATORS, SUPPLEMENTAL_ARROWS_A_ARROWS_TO_LONG_ARROWS, SUPPLEMENTAL_ARROWS_B_TO_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_FENCES_TO_BOWTIE_SYMBOLS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_TO_SPECIALIZED_PLUS_SIGN_OPERATORS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_TO_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MATHEMATICAL_ARROWS_SUBSET_2, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MATHEMATICAL_ARROWS_SUBSET_1), '+', '|', '~', '¬', '±', '×', '÷', '϶', '⁄', '⁒', '℘', '⅋', '↠', '↣', '↦', '↮', '⇒', '⇔', '⍼', '▷', '◁', '♯'),
    CURRENCY_SYMBOL(keys("Sc", "Currency_Symbol"), "any currency sign.", asList(LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET, NKO_CURRENCY_SYMBOLS, BENGALI_CURRENCY_SYMBOLS, CURRENCY_SYMBOLS_CURRENCY_SYMBOLS_SUBSET), '$', '֏', '؋', '৻', '૱', '௹', '฿', '៛', '꠸'),
    MODIFIER_SYMBOL(keys("Sk", "Modifier_Symbol"), "a combining character (mark) as a full character on its own.", asList(SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_SUBSET, SPACING_MODIFIER_LETTERS_MISCELLANEOUS_PHONETIC_MODIFIERS_TO_ADDITIONS_BASED_ON_1989_IPA, SPACING_MODIFIER_LETTERS_TONE_LETTERS_TO_EXTENDED_BOPOMOFO_TONE_MARKS, SPACING_MODIFIER_LETTERS_UPA_MODIFIERS, GREEK_AND_COPTIC_SPACING_ACCENT_MARKS, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_45, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_10, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_20, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_32, GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK_SUBSET_44, CJK_SYMBOLS_AND_PUNCTUATION_VOICING_MARKS_SUBSET_1, MODIFIER_TONE_LETTERS_CORNER_TONE_MARKS_FOR_CHINESE_TO_LEFT_STEM_TONE_LETTERS, LATIN_EXTENDED_D_ADDITIONS_FOR_UPA, LATIN_EXTENDED_D_MODIFIER_LETTERS_SUBSET, LATIN_EXTENDED_E_LETTERS_FOR_SCOTS_DIALECTOLOGY_SUBSET), '^', '`', '¨', '¯', '´', '¸', '˭', '͵', '᾽', '꭛'),
    OTHER_SYMBOL(keys("So", "Other_Symbol"), "various symbols that are not math symbols, currency signs, or combining characters.", asList(CYRILLIC_SUPPLEMENT_RELIGIOUS_SYMBOLS, ARABIC_POETIC_MARKS, ARABIC_SIGNS_FOR_SINDHI, BENGALI_TAMIL_CALENDRICAL_SYMBOLS_TO_TAMIL_CLERICAL_SYMBOLS, TIBETAN_HEAD_MARKS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_SUBSET_2, TIBETAN_SIGNS_TO_SYMBOLS, TIBETAN_SYMBOLS_SUBSET, TIBETAN_ASTROLOGICAL_SIGNS_2, TIBETAN_RELIGIOUS_SYMBOLS, MYANMAR_SHAN_SYMBOLS, ETHIOPIC_SUPPLEMENT_TONAL_MARKS_SUBSET, NEW_TAI_LUE_TO_KHMER_SYMBOLS, BALINESE_MUSICAL_SYMBOLS_FOR_NOTES, BALINESE_MUSICAL_SYMBOLS, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_1, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_2, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_7, LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_SUBSET_10, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET_1, LETTERLIKE_SYMBOLS_ADDITIONAL_LETTERLIKE_SYMBOLS_SUBSET, NUMBER_FORMS_TURNED_DIGITS_SUBSET, ARROWS_SIMPLE_ARROWS_SUBSET_1, ARROWS_ARROWS_WITH_MODIFICATIONS_SUBSET_1, ARROWS_ARROWS_WITH_MODIFICATIONS_SUBSET_2, ARROWS_ARROWS_WITH_MODIFICATIONS_SUBSET_3, ARROWS_ARROWS_WITH_MODIFICATIONS_SUBSET_4, ARROWS_ARROWS_WITH_MODIFICATIONS_TO_PAIRED_ARROWS_AND_HARPOONS, ARROWS_DOUBLE_ARROWS_SUBSET_1, ARROWS_DOUBLE_ARROWS_TO_WHITE_ARROWS_AND_KEYBOARD_SYMBOLS, MISCELLANEOUS_TECHNICAL_MISCELLANEOUS_TECHNICAL, MISCELLANEOUS_TECHNICAL_CROPS_TO_QUINE_CORNERS, MISCELLANEOUS_TECHNICAL_FROWN_AND_SMILE_TO_KEYBOARD_SYMBOLS, MISCELLANEOUS_TECHNICAL_KEYBOARD_SYMBOL_TO_APL, MISCELLANEOUS_TECHNICAL_GRAPHICS_FOR_CONTROL_CODES_TO_KEYBOARD_SYMBOLS_FROM_ISO_9995_7, MISCELLANEOUS_TECHNICAL_HORIZONTAL_BRACKETS_TO_ELECTROTECHNICAL_SYMBOLS, MISCELLANEOUS_TECHNICAL_TO_CONTROL_PICTURES, OPTICAL_CHARACTER_RECOGNITION_OCR_A_TO_MICR, ENCLOSED_ALPHANUMERICS_PARENTHESIZED_LATIN_LETTERS_TO_CIRCLED_LATIN_LETTERS, BOX_DRAWING_TO_GEOMETRIC_SHAPES, GEOMETRIC_SHAPES_GEOMETRIC_SHAPES_SUBSET, GEOMETRIC_SHAPES_GEOMETRIC_SHAPES_TO_CONTROL_CODE_GRAPHICS, MISCELLANEOUS_SYMBOLS_WEATHER_AND_ASTROLOGICAL_SYMBOLS_TO_MUSICAL_SYMBOLS, MISCELLANEOUS_SYMBOLS_TO_DINGBATS, DINGBATS_DINGBAT_ARROW_TO_DINGBAT_ARROWS, BRAILLE_PATTERNS_BRAILLE_PATTERNS, MISCELLANEOUS_SYMBOLS_AND_ARROWS_WHITE_AND_BLACK_ARROWS_TO_ELLIPSES, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MATHEMATICAL_ARROWS_SUBSET, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MISCELLANEOUS_ARROW_TO_TRIANGLE_HEADED_ARROWS, MISCELLANEOUS_SYMBOLS_AND_ARROWS_TRIANGLE_HEADED_ARROWS_TO_MISCELLANEOUS_ARROW_SYMBOL, MISCELLANEOUS_SYMBOLS_AND_ARROWS_MISCELLANEOUS_SYMBOL_TO_SYMBOLS_USED_IN_CHESS_NOTATION, COPTIC_SYMBOLS_SUBSET, SUPPLEMENTAL_PUNCTUATION_HISTORIC_PUNCTUATION_SUBSET, CJK_RADICALS_SUPPLEMENT_CJK_RADICALS_SUPPLEMENT_SUBSET, CJK_RADICALS_SUPPLEMENT_CJK_RADICALS_SUPPLEMENT_SUBSET_1, KANGXI_RADICALS_KANGXI_RADICALS_SUBSET, IDEOGRAPHIC_DESCRIPTION_CHARACTERS_IDEOGRAPHIC_DESCRIPTION_CHARACTERS_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS, CJK_SYMBOLS_AND_PUNCTUATION_OTHER_CJK_SYMBOLS, CJK_SYMBOLS_AND_PUNCTUATION_SPECIAL_CJK_INDICATORS, KANBUN_TATETEN_TO_TATETEN, KANBUN_KAERITEN_SUBSET_1, CJK_STROKES_CJK_STROKES_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_HANGUL_LETTERS_TO_PARENTHESIZED_KOREAN_WORDS, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_IDEOGRAPHS_TO_CIRCLED_IDEOGRAPHS_FROM_ARIB_STD_B24, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_HANGUL_LETTERS_TO_CIRCLED_HANGUL_SYLLABLE, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_IDEOGRAPHS_SUBSET_1, ENCLOSED_CJK_LETTERS_AND_MONTHS_TO_CJK_COMPATIBILITY, YIJING_HEXAGRAM_SYMBOLS_YIJING_HEXAGRAM_SYMBOLS, YI_RADICALS_YI_RADICALS_SUBSET, SYLOTI_NAGRI_POETRY_MARKS, COMMON_INDIC_NUMBER_FORMS_NUMBER_FORMS_TO_NUMBER_FORMS, MYANMAR_EXTENDED_A_AITON_SYMBOLS_AND_LETTERS_SUBSET), '¦', '©', '®', '°', '҂', '۞', '۩', '߶', '৺', '୰', '௺', '౿', '൏', '൹', '༓', '༴', '༶', '༸', '᙭', '᥀', '℔', '℥', '℧', '℩', '℮', '⅊', '⅏', '⇓', '〄', '〠', '㉐', '꠹'),
    NUMBER(keys("N", "Number"), "any kind of numeric character in any script.", asList(BASIC_LATIN_ASCII_DIGITS, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_4, LATIN_1_SUPPLEMENT_VULGAR_FRACTIONS, ARABIC_ARABIC_INDIC_DIGITS, ARABIC_EASTERN_ARABIC_INDIC_DIGITS, NKO_DIGITS, DEVANAGARI_DIGITS, BENGALI_DIGITS, BENGALI_HISTORIC_SYMBOLS_FOR_FRACTIONAL_VALUES, BENGALI_DIGITS_1, BENGALI_DIGITS_2, BENGALI_DIGITS_3, BENGALI_FRACTION_SIGNS_SUBSET, BENGALI_DIGITS_TO_TAMIL_NUMERICS, TELUGU_DIGITS_SUBSET, TELUGU_TELUGU_FRACTIONS_AND_WEIGHTS_SUBSET, KANNADA_DIGITS_SUBSET, MALAYALAM_MINOR_FRACTIONS, MALAYALAM_DIGITS_TO_FRACTIONS, MALAYALAM_ASTROLOGICAL_DIGITS_SUBSET, MALAYALAM_DIGITS_1, MALAYALAM_DIGITS_SUBSET, TIBETAN_DIGITS_TO_DIGITS_MINUS_HALF, MYANMAR_DIGITS, MYANMAR_SHAN_DIGITS, ETHIOPIC_DIGITS_TO_NUMBERS, RUNIC_GOLDEN_NUMBER_RUNES, KHMER_DIGITS_SUBSET, KHMER_NUMERIC_SYMBOLS_FOR_DIVINATION_LORE_SUBSET, MONGOLIAN_DIGITS_SUBSET, LIMBU_DIGITS, NEW_TAI_LUE_DIGITS_SUBSET, TAI_THAM_HORA_DIGITS_SUBSET, TAI_THAM_THAM_DIGITS_SUBSET, BALINESE_DIGITS, SUNDANESE_DIGITS, LEPCHA_DIGITS_SUBSET, OL_CHIKI_DIGITS, SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_SUBSET, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_SUBSET, NUMBER_FORMS_FRACTIONS_TO_ARCHAIC_ROMAN_NUMERALS, NUMBER_FORMS_ARCHAIC_ROMAN_NUMERALS_TO_ARCHAIC_ROMAN_NUMERALS_1, ENCLOSED_ALPHANUMERICS_CIRCLED_NUMBERS_TO_NUMBERS_PERIOD, ENCLOSED_ALPHANUMERICS_ADDITIONAL_CIRCLED_NUMBER_TO_DOUBLE_CIRCLED_NUMBERS, DINGBATS_DINGBAT_CIRCLED_DIGITS, CJK_SYMBOLS_AND_PUNCTUATION_SUZHOU_NUMERALS, CJK_SYMBOLS_AND_PUNCTUATION_ADDITIONAL_SUZHOU_NUMERALS, KANBUN_KAERITEN_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_IDEOGRAPHS_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS_ON_BLACK_SQUARES_FROM_ARIB_STD_B24, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_IDEOGRAPHS_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS_1, VAI_DIGITS, BAMUM_SYLLABLES_SUBSET_1, COMMON_INDIC_NUMBER_FORMS_NUMBER_FORMS_SUBSET, SAURASHTRA_DIGITS_SUBSET, KAYAH_LI_DIGITS, JAVANESE_DIGITS_SUBSET, MYANMAR_EXTENDED_B_TAI_LAING_DIGITS, CHAM_DIGITS_SUBSET, MEETEI_MAYEK_DIGITS_SUBSET), '¹', '⁰', '⳽', '〇'),
    DECIMAL_DIGIT_NUMBER(keys("Nd", "Decimal_Digit_Number"), "a digit zero through nine in any script except ideographic scripts.", asList(BASIC_LATIN_ASCII_DIGITS, ARABIC_ARABIC_INDIC_DIGITS, ARABIC_EASTERN_ARABIC_INDIC_DIGITS, NKO_DIGITS, DEVANAGARI_DIGITS, BENGALI_DIGITS, BENGALI_DIGITS_1, BENGALI_DIGITS_2, BENGALI_DIGITS_3, BENGALI_DIGITS_4, TELUGU_DIGITS_SUBSET, KANNADA_DIGITS_SUBSET, MALAYALAM_DIGITS, MALAYALAM_ASTROLOGICAL_DIGITS_SUBSET, MALAYALAM_DIGITS_1, MALAYALAM_DIGITS_SUBSET, TIBETAN_DIGITS, MYANMAR_DIGITS, MYANMAR_SHAN_DIGITS, KHMER_DIGITS_SUBSET, MONGOLIAN_DIGITS_SUBSET, LIMBU_DIGITS, NEW_TAI_LUE_DIGITS_SUBSET_1, TAI_THAM_HORA_DIGITS_SUBSET, TAI_THAM_THAM_DIGITS_SUBSET, BALINESE_DIGITS, SUNDANESE_DIGITS, LEPCHA_DIGITS_SUBSET, OL_CHIKI_DIGITS, VAI_DIGITS, SAURASHTRA_DIGITS_SUBSET, KAYAH_LI_DIGITS, JAVANESE_DIGITS_SUBSET, MYANMAR_EXTENDED_B_TAI_LAING_DIGITS, CHAM_DIGITS_SUBSET, MEETEI_MAYEK_DIGITS_SUBSET)),
    LETTER_NUMBER(keys("Nl", "Letter_Number"), "a number that looks like a letter, such as a Roman numeral.", asList(RUNIC_GOLDEN_NUMBER_RUNES, NUMBER_FORMS_ROMAN_NUMERALS_TO_ARCHAIC_ROMAN_NUMERALS, NUMBER_FORMS_ARCHAIC_ROMAN_NUMERALS_1, CJK_SYMBOLS_AND_PUNCTUATION_SUZHOU_NUMERALS, CJK_SYMBOLS_AND_PUNCTUATION_ADDITIONAL_SUZHOU_NUMERALS, BAMUM_SYLLABLES_SUBSET_1), '〇'),
    OTHER_NUMBER(keys("No", "Other_Number"), "a superscript or subscript digit, or a number that is not a digit 0-9 (excluding numbers from ideographic scripts).", asList(LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_4, LATIN_1_SUPPLEMENT_VULGAR_FRACTIONS, BENGALI_HISTORIC_SYMBOLS_FOR_FRACTIONAL_VALUES, BENGALI_FRACTION_SIGNS_SUBSET, BENGALI_TAMIL_NUMERICS, TELUGU_TELUGU_FRACTIONS_AND_WEIGHTS_SUBSET, MALAYALAM_MINOR_FRACTIONS, MALAYALAM_MALAYALAM_NUMERICS_TO_FRACTIONS, TIBETAN_DIGITS_MINUS_HALF, ETHIOPIC_DIGITS_TO_NUMBERS, KHMER_NUMERIC_SYMBOLS_FOR_DIVINATION_LORE_SUBSET, SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_SUBSET, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_SUBSET, NUMBER_FORMS_FRACTIONS, ENCLOSED_ALPHANUMERICS_CIRCLED_NUMBERS_TO_NUMBERS_PERIOD, ENCLOSED_ALPHANUMERICS_ADDITIONAL_CIRCLED_NUMBER_TO_DOUBLE_CIRCLED_NUMBERS, DINGBATS_DINGBAT_CIRCLED_DIGITS, KANBUN_KAERITEN_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_IDEOGRAPHS_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS_ON_BLACK_SQUARES_FROM_ARIB_STD_B24, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_IDEOGRAPHS_SUBSET, ENCLOSED_CJK_LETTERS_AND_MONTHS_CIRCLED_NUMBERS_1, COMMON_INDIC_NUMBER_FORMS_NUMBER_FORMS_SUBSET), '¹', '᧚', '⁰', '↉', '⳽'),
    PUNCTUATION(keys("P", "Punctuation"), "any kind of punctuation character.", asList(BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_SUBSET_1, BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_SUBSET_3, BASIC_LATIN_ASCII_PUNCTUATION, BASIC_LATIN_ASCII_PUNCTUATION_1, BASIC_LATIN_ASCII_PUNCTUATION_2, BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_SUBSET, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_5, CYRILLIC_SUPPLEMENT_MODIFIER_LETTERS_SUBSET, CYRILLIC_SUPPLEMENT_PUNCTUATION_SUBSET, CYRILLIC_SUPPLEMENT_ADDITIONAL_PUNCTUATION_SUBSET, ARABIC_PUNCTUATION, ARABIC_PUNCTUATION_1, ARABIC_PUNCTUATION_SUBSET, ARABIC_PUNCTUATION_4, SYRIAC_SYRIAC_PUNCTUATION_AND_SIGNS_SUBSET, NKO_PUNCTUATION, SAMARITAN_PUNCTUATION_SUBSET, DEVANAGARI_GENERIC_PUNCTUATION_FOR_SCRIPTS_OF_INDIA, MALAYALAM_SIGNS_SUBSET, TIBETAN_HEAD_MARKS_TO_MARKS_AND_SIGNS, TIBETAN_PAIRED_PUNCTUATION, TIBETAN_MARKS_TO_HEAD_MARKS, TIBETAN_ANNOTATION_MARKS_SUBSET, MYANMAR_PUNCTUATION_TO_VARIOUS_SIGNS, ETHIOPIC_PUNCTUATION, OGHAM_PUNCTUATION_SUBSET, RUNIC_PUNCTUATION, HANUNOO_GENERIC_PUNCTUATION_FOR_PHILIPPINE_SCRIPTS_SUBSET, KHMER_VARIOUS_SIGNS_SUBSET_1, KHMER_VARIOUS_SIGNS_SUBSET_2, MONGOLIAN_PUNCTUATION, LIMBU_VARIOUS_SIGNS_SUBSET, BUGINESE_VARIOUS_SIGNS, TAI_THAM_LOGOGRAPHS_TO_PUNCTUATION, TAI_THAM_PUNCTUATION_SUBSET, BALINESE_PUNCTUATION, BATAK_PUNCTUATION, LEPCHA_PUNCTUATION, OL_CHIKI_PUNCTUATION, SUNDANESE_SUPPLEMENT_PUNCTUATION_SUBSET, GENERAL_PUNCTUATION_DASHES_TO_GENERAL_PUNCTUATION, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_TO_GENERAL_PUNCTUATION, GENERAL_PUNCTUATION_BRACKETS_TO_GENERAL_PUNCTUATION, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_TO_ARCHAIC_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_SUBSET_2, SUPERSCRIPTS_AND_SUBSCRIPTS_SUBSCRIPTS_SUBSET_2, MISCELLANEOUS_TECHNICAL_CEILINGS_AND_FLOORS, MISCELLANEOUS_TECHNICAL_DEPRECATED_ANGLE_BRACKETS, DINGBATS_ORNAMENTAL_BRACKETS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_PAIRED_PUNCTUATION, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A_MATHEMATICAL_BRACKETS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_BRACKETS_TO_BRACKETS, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_FENCES_1, MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B_BRACKETS_2, COPTIC_OLD_NUBIAN_PUNCTUATION, COPTIC_PUNCTUATION, SUPPLEMENTAL_PUNCTUATION_NEW_TESTAMENT_EDITORIAL_SYMBOLS_TO_HISTORIC_PUNCTUATION, SUPPLEMENTAL_PUNCTUATION_HISTORIC_PUNCTUATION_TO_HISTORIC_PUNCTUATION, CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS_AND_PUNCTUATION_SUBSET, CJK_SYMBOLS_AND_PUNCTUATION_CJK_ANGLE_BRACKETS_TO_CJK_BRACKETS, CJK_SYMBOLS_AND_PUNCTUATION_CJK_BRACKETS_TO_CJK_PUNCTUATION, LISU_PUNCTUATION, VAI_PUNCTUATION, BAMUM_PUNCTUATION_SUBSET, PHAGS_PA_HEAD_MARKS_FOR_TIBETAN_TO_PUNCTUATION_FOR_TIBETAN, SAURASHTRA_PUNCTUATION, DEVANAGARI_EXTENDED_EDITORIAL_MARKS_SUBSET, KAYAH_LI_PUNCTUATION, JAVANESE_PUNCTUATION_SUBSET, JAVANESE_ELLIPSIS_MARKS, CHAM_PUNCTUATION, TAI_VIET_PUNCTUATION, MEETEI_MAYEK_EXTENSIONS_PUNCTUATION), '_', '{', '}', '¡', '§', '«', '»', '¿', ';', '·', '־', '׀', '׃', '׆', '؛', '۔', '࡞', '॰', '৽', '੶', '૰', '౷', '಄', '෴', '๏', '༔', '྅', '჻', '᐀', '᙮', '᳓', '⵰', '⹒', '〰', '〽', '゠', '・', '꙳', '꙾', '꣼', '꥟', '꯫'),
    DASH_PUNCTUATION(keys("Pd", "Dash_Punctuation"), "any kind of hyphen or dash.", asList(GENERAL_PUNCTUATION_DASHES, SUPPLEMENTAL_PUNCTUATION_DASHES), '-', '֊', '־', '᐀', '᠆', '⸗', '⸚', '⹀', '〜', '〰', '゠'),
    OPEN_PUNCTUATION(keys("Ps", "Open_Punctuation"), "any kind of opening bracket.", '(', '[', '{', '༺', '༼', '᚛', '‚', '„', '⁅', '⁽', '₍', '⌈', '⌊', '〈', '❨', '❪', '❬', '❮', '❰', '❲', '❴', '⟅', '⟦', '⟨', '⟪', '⟬', '⟮', '⦃', '⦅', '⦇', '⦉', '⦋', '⦍', '⦏', '⦑', '⦓', '⦕', '⦗', '⧘', '⧚', '⧼', '⸢', '⸤', '⸦', '⸨', '⹂', '〈', '《', '「', '『', '【', '〔', '〖', '〘', '〚', '〝'),
    CLOSE_PUNCTUATION(keys("Pe", "Close_Punctuation"), "any kind of closing bracket.", CJK_SYMBOLS_AND_PUNCTUATION_CJK_PUNCTUATION_SUBSET, new char[]{')', ']', '}', '༻', '༽', '᚜', '⁆', '⁾', '₎', '⌉', '⌋', '〉', '❩', '❫', '❭', '❯', '❱', '❳', '❵', '⟆', '⟧', '⟩', '⟫', '⟭', '⟯', '⦄', '⦆', '⦈', '⦊', '⦌', '⦎', '⦐', '⦒', '⦔', '⦖', '⦘', '⧙', '⧛', '⧽', '⸣', '⸥', '⸧', '⸩', '〉', '》', '」', '』', '】', '〕', '〗', '〙', '〛'}),
    INITIAL_PUNCTUATION(keys("Pi", "Initial_Punctuation"), "any kind of opening quote.", GENERAL_PUNCTUATION_QUOTATION_MARKS_AND_APOSTROPHE_SUBSET, new char[]{'«', '‘', '‟', '‹', '⸂', '⸄', '⸉', '⸌', '⸜', '⸠'}),
    FINAL_PUNCTUATION(keys("Pf", "Final_Punctuation"), "any kind of closing quote.", '»', '’', '”', '›', '⸃', '⸅', '⸊', '⸍', '⸝', '⸡'),
    CONNECTOR_PUNCTUATION(keys("Pc", "Connector_Punctuation"), "a punctuation character such as an underscore that connects words.", GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_SUBSET_1, new char[]{'_', '⁔'}),
    OTHER_PUNCTUATION(keys("Po", "Other_Punctuation"), "any kind of punctuation character that is not a dash, bracket, quote or connector.", asList(BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_SUBSET_1, BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_SUBSET_2, BASIC_LATIN_ASCII_PUNCTUATION_SUBSET, BASIC_LATIN_ASCII_PUNCTUATION_1, BASIC_LATIN_ASCII_PUNCTUATION_2, LATIN_1_SUPPLEMENT_LATIN_1_PUNCTUATION_AND_SYMBOLS_SUBSET_5, CYRILLIC_SUPPLEMENT_MODIFIER_LETTERS_SUBSET, CYRILLIC_SUPPLEMENT_ADDITIONAL_PUNCTUATION_SUBSET, ARABIC_PUNCTUATION, ARABIC_PUNCTUATION_1, ARABIC_PUNCTUATION_SUBSET, ARABIC_PUNCTUATION_4, SYRIAC_SYRIAC_PUNCTUATION_AND_SIGNS_SUBSET, NKO_PUNCTUATION, SAMARITAN_PUNCTUATION_SUBSET, DEVANAGARI_GENERIC_PUNCTUATION_FOR_SCRIPTS_OF_INDIA, MALAYALAM_SIGNS_SUBSET, TIBETAN_HEAD_MARKS_TO_MARKS_AND_SIGNS, TIBETAN_MARKS_TO_HEAD_MARKS, TIBETAN_ANNOTATION_MARKS_SUBSET, MYANMAR_PUNCTUATION_TO_VARIOUS_SIGNS, ETHIOPIC_PUNCTUATION, RUNIC_PUNCTUATION, HANUNOO_GENERIC_PUNCTUATION_FOR_PHILIPPINE_SCRIPTS_SUBSET, KHMER_VARIOUS_SIGNS_SUBSET_1, KHMER_VARIOUS_SIGNS_SUBSET_2, MONGOLIAN_PUNCTUATION_SUBSET, MONGOLIAN_PUNCTUATION_SUBSET_1, LIMBU_VARIOUS_SIGNS_SUBSET, BUGINESE_VARIOUS_SIGNS, TAI_THAM_LOGOGRAPHS_TO_PUNCTUATION, TAI_THAM_PUNCTUATION_SUBSET, BALINESE_PUNCTUATION, BATAK_PUNCTUATION, LEPCHA_PUNCTUATION, OL_CHIKI_PUNCTUATION, SUNDANESE_SUPPLEMENT_PUNCTUATION_SUBSET, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_1, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_2, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_TO_GENERAL_PUNCTUATION_1, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_SUBSET, GENERAL_PUNCTUATION_DOUBLE_PUNCTUATION_FOR_VERTICAL_TEXT_TO_GENERAL_PUNCTUATION, GENERAL_PUNCTUATION_GENERAL_PUNCTUATION_TO_ARCHAIC_PUNCTUATION_1, COPTIC_OLD_NUBIAN_PUNCTUATION, COPTIC_PUNCTUATION, SUPPLEMENTAL_PUNCTUATION_NEW_TESTAMENT_EDITORIAL_SYMBOLS_SUBSET, SUPPLEMENTAL_PUNCTUATION_NEW_TESTAMENT_EDITORIAL_SYMBOLS_SUBSET_1, SUPPLEMENTAL_PUNCTUATION_ANCIENT_GREEK_TEXTUAL_SYMBOLS, SUPPLEMENTAL_PUNCTUATION_GENERAL_PUNCTUATION, SUPPLEMENTAL_PUNCTUATION_DICTIONARY_PUNCTUATION_1, SUPPLEMENTAL_PUNCTUATION_HISTORIC_PUNCTUATION_SUBSET_1, SUPPLEMENTAL_PUNCTUATION_HISTORIC_PUNCTUATION_TO_PALAEOTYPE_TRANSLITERATION_SYMBOLS, SUPPLEMENTAL_PUNCTUATION_ALTERNATE_FORMS_OF_PUNCTUATION_TO_ALTERNATE_FORMS_OF_PUNCTUATION, SUPPLEMENTAL_PUNCTUATION_MISCELLANEOUS_PUNCTUATION_TO_HISTORIC_PUNCTUATION, CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS_AND_PUNCTUATION_SUBSET, LISU_PUNCTUATION, VAI_PUNCTUATION, BAMUM_PUNCTUATION_SUBSET, PHAGS_PA_HEAD_MARKS_FOR_TIBETAN_TO_PUNCTUATION_FOR_TIBETAN, SAURASHTRA_PUNCTUATION, DEVANAGARI_EXTENDED_EDITORIAL_MARKS_SUBSET, KAYAH_LI_PUNCTUATION, JAVANESE_PUNCTUATION_SUBSET, JAVANESE_ELLIPSIS_MARKS, CHAM_PUNCTUATION, TAI_VIET_PUNCTUATION, MEETEI_MAYEK_EXTENSIONS_PUNCTUATION), '*', ',', '\\', '¡', '§', '¿', ';', '·', '։', '׀', '׃', '׆', '؛', '۔', '࡞', '॰', '৽', '੶', '૰', '౷', '಄', '෴', '๏', '༔', '྅', '჻', '᙮', '᳓', '⁓', '⵰', '⸋', '⸛', '⹁', '⹒', '〽', '・', '꙳', '꙾', '꣼', '꥟', '꯫'),
    CONTROL(keys("Cc", "Control"), "an ASCII or Latin-1 control character", BASIC_LATIN_TO_LATIN_1_SUPPLEMENT),
    FORMAT(keys("Cf", "Format"), "invisible formatting indicator.", asList(ARABIC_SUBTENDING_MARKS_TO_SUBTENDING_MARKS, GENERAL_PUNCTUATION_FORMAT_CHARACTERS, GENERAL_PUNCTUATION_FORMAT_CHARACTERS_1, GENERAL_PUNCTUATION_FORMAT_CHARACTER_TO_INVISIBLE_OPERATORS, GENERAL_PUNCTUATION_FORMAT_CHARACTERS_TO_DEPRECATED), '­', '؜', '۝', '܏', '࣢', '᠎'),
    /*NO_CHANGE*/    PRIVATE_USE(keys("Co", "Private_Use"), "any code point reserved for private use.", range('', '')),
    IN_BASIC_LATIN(keys("InBasic_Latin", "Latin"), "32-U+007F", BASIC_LATIN_ASCII_PUNCTUATION_AND_SYMBOLS_TO_ASCII_PUNCTUATION_AND_SYMBOLS),
    IN_IPA_EXTENSIONS(keys("InIPA_Extensions"), "U+0250-U+02AF", IPA_EXTENSIONS_IPA_EXTENSIONS_TO_ADDITIONS_FOR_SINOLOGY),
    IN_SPACING_MODIFIER_LETTERS(keys("InSpacing_Modifier_Letters"), "U+02B0-U+02FF", SPACING_MODIFIER_LETTERS_LATIN_SUPERSCRIPT_MODIFIER_LETTERS_TO_UPA_MODIFIERS),
    IN_COMBINING_DIACRITICAL_MARKS(keys("InCombining_Diacritical_Marks"), "U+0300-U+036F", COMBINING_DIACRITICAL_MARKS_ORDINARY_DIACRITICS_TO_MEDIEVAL_SUPERSCRIPT_LETTER_DIACRITICS),

    IN_CYRILLIC(keys("InCyrillic", "Cyrillic"), "U+0400-U+04FF", CYRILLIC_CYRILLIC_EXTENSIONS_TO_ADDITIONS_FOR_NIVKH),
    IN_CYRILLIC_SUPPLEMENTARY(keys("InCyrillic_Supplementary"), "U+0500-U+052F", CYRILLIC_SUPPLEMENT_KOMI_LETTERS_TO_KHANTY_LETTERS),
    IN_ARMENIAN(keys("InArmenian", "Armenian"), "U+0530-U+058F", CYRILLIC_SUPPLEMENT_ARMENIAN_TO_RELIGIOUS_SYMBOLS),
    IN_HEBREW(keys("InHebrew", "Hebrew"), "U+0590-U+05FF", CYRILLIC_SUPPLEMENT_HEBREW_TO_ADDITIONAL_PUNCTUATION),
    IN_ARABIC(keys("InArabic", "Arabic"), "U+0600-U+06FF", ARABIC_SUBTENDING_MARKS_TO_SIGNS_FOR_SINDHI),
    IN_SYRIAC(keys("InSyriac", "Syriac"), "U+0700-U+074F", SYRIAC_SYRIAC_PUNCTUATION_AND_SIGNS_TO_SOGDIAN_LETTERS),
    IN_THAANA(keys("InThaana", "Thaana"), "U+0780-U+07BF", THAANA_BASIC_CONSONANTS_TO_CONSONANT_FOR_ADDU_DIALECT),
    IN_DEVANAGARI(keys("InDevanagari", "Devanagari"), "U+0900-U+097F", DEVANAGARI_VARIOUS_SIGNS_TO_SINDHI_IMPLOSIVES),
    IN_BENGALI(keys("InBengali", "Bengali"), "U+0980-U+09FF", BENGALI_VARIOUS_SIGNS_TO_SIGNS),
    IN_GURMUKHI(keys("InGurmukhi", "Gurmukhi"), "U+0A00-U+0A7F", BENGALI_GURMUKHI_TO_SIGNS),
    IN_GUJARATI(keys("InGujarati", "Gujarati"), "U+0A80-U+0AFF", BENGALI_GUJARATI_TO_TRANSLITERATION_SIGNS),
    IN_ORIYA(keys("InOriya", "Oriya"), "U+0B00-U+0B7F", BENGALI_ORIYA_TO_FRACTION_SIGNS),
    IN_TAMIL(keys("InTamil", "Tamil"), "U+0B80-U+0BFF", BENGALI_TAMIL_TO_TAMIL_CLERICAL_SYMBOL),
    IN_TELUGU(keys("InTelugu", "Telugu"), "U+0C00-U+0C7F", TELUGU_VARIOUS_SIGNS_TO_TELUGU_FRACTIONS_AND_WEIGHTS),
    IN_KANNADA(keys("InKannada", "Kannada"), "U+0C80-U+0CFF", KANNADA_VARIOUS_SIGNS_TO_SIGNS_USED_IN_SANSKRIT),
    IN_MALAYALAM(keys("InMalayalam", "Malayalam"), "U+0D00-U+0D7F", MALAYALAM_VARIOUS_SIGNS_TO_CHILLU_LETTERS),
    IN_SINHALA(keys("InSinhala", "Sinhala"), "U+0D80-U+0DFF", MALAYALAM_SINHALA_TO_PUNCTUATION),
    IN_THAI(keys("InThai", "Thai"), "U+0E00-U+0E7F", MALAYALAM_THAI_TO_SIGNS),
    IN_LAO(keys("InLao", "Lao"), "U+0E80-U+0EFF", MALAYALAM_LAO_TO_CONSONANTS_FOR_KHMU),
    IN_TIBETAN(keys("InTibetan", "Tibetan"), "U+0F00-U+0FFF", TIBETAN_SYLLABLE_TO_ANNOTATION_MARKS),
    IN_MYANMAR(keys("InMyanmar", "Myanmar"), "U+1000-U+109F", MYANMAR_CONSONANTS_TO_SHAN_SYMBOLS),
    IN_GEORGIAN(keys("InGeorgian", "Georgian"), "U+10A0-U+10FF", GEORGIAN_CAPITAL_LETTERS_KHUTSURI_TO_ADDITIONAL_LETTERS_FOR_OSSETIAN_AND_ABKHAZ),
    IN_HANGUL_JAMO(keys("InHangul_Jamo", "Hangul"), "U+1100-U+11FF", HANGUL_JAMO_INITIAL_CONSONANTS_TO_OLD_FINAL_CONSONANTS),
    IN_ETHIOPIC(keys("InEthiopic", "Ethiopic"), "U+1200-U+137F", ETHIOPIC_SYLLABLES_TO_NUMBERS),
    IN_CHEROKEE(keys("InCherokee", "Cherokee"), "U+13A0-U+13FF", CHEROKEE_UPPERCASE_SYLLABLES_TO_ARCHAIC_LOWERCASE_SYLLABLE),
    IN_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS(keys("InUnified_Canadian_Aboriginal_Syllabics", "Canadian_Aboriginal"), "U+1400-U+167F", UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_PUNCTUATION_TO_SYLLABLES),
    IN_OGHAM(keys("InOgham", "Ogham"), "U+1680-U+169F", OGHAM_SPACE_TO_PUNCTUATION),
    IN_RUNIC(keys("InRunic", "Runic"), "U+16A0-U+16FF", RUNIC_LETTERS_TO_CRYPTOGRAMMIC_LETTERS),
    IN_TAGALOG(keys("InTagalog", "Tagalog"), "U+1700-U+171F", TAGALOG_INDEPENDENT_VOWELS_TO_VIRAMAS),
    IN_HANUNOO(keys("InHanunoo", "Hanunoo"), "U+1720-U+173F", HANUNOO_INDEPENDENT_VOWELS_TO_GENERIC_PUNCTUATION_FOR_PHILIPPINE_SCRIPTS),
    IN_BUHID(keys("InBuhid", "Buhid"), "U+1740-U+175F", BUHID_INDEPENDENT_VOWELS_TO_DEPENDENT_VOWEL_SIGNS),
    IN_TAGBANWA(keys("InTagbanwa", "Tagbanwa"), "U+1760-U+177F", TAGBANWA_INDEPENDENT_VOWELS_TO_DEPENDENT_VOWEL_SIGNS),
    IN_KHMER(keys("InKhmer", "Khmer"), "U+1780-U+17FF", KHMER_CONSONANTS_TO_NUMERIC_SYMBOLS_FOR_DIVINATION_LORE),
    IN_MONGOLIAN(keys("InMongolian", "Mongolian"), "U+1800-U+18AF", MONGOLIAN_PUNCTUATION_TO_EXTENSIONS_FOR_SANSKRIT_AND_TIBETAN),
    IN_LIMBU(keys("InLimbu", "Limbu"), "U+1900-U+194F", LIMBU_CONSONANTS_TO_DIGITS),
    IN_TAI_LE(keys("InTai_Le"), "U+1950-U+197F", TAI_LE_CONSONANTS_TO_TONE_LETTERS),
    IN_KHMER_SYMBOLS(keys("InKhmer_Symbols"), "U+19E0-U+19FF", KHMER_SYMBOLS_LUNAR_DATE_SYMBOLS),
    IN_PHONETIC_EXTENSIONS(keys("InPhonetic_Extensions"), "U+1D00-U+1D7F", PHONETIC_EXTENSIONS_LATIN_LETTERS_TO_OTHER_PHONETIC_SYMBOLS),
    IN_LATIN_EXTENDED_ADDITIONAL(keys("InLatin_Extended_Additional"), "U+1E00-U+1EFF", LATIN_EXTENDED_ADDITIONAL_LATIN_GENERAL_USE_EXTENSIONS_TO_MEDIEVALIST_ADDITIONS),
    IN_GREEK_EXTENDED(keys("InGreek_Extended", "Greek"), "U+1F00-U+1FFF", GREEK_EXTENDED_PRECOMPOSED_POLYTONIC_GREEK),
    IN_GENERAL_PUNCTUATION(keys("InGeneral_Punctuation"), "U+2000-U+206F", GENERAL_PUNCTUATION_SPACES_TO_DEPRECATED),
    IN_SUPERSCRIPTS_AND_SUBSCRIPTS(keys("InSuperscripts_and_Subscripts"), "U+2070-U+209F", SUPERSCRIPTS_AND_SUBSCRIPTS_SUPERSCRIPTS_TO_SUBSCRIPTS_FOR_UPA),
    IN_CURRENCY_SYMBOLS(keys("InCurrency_Symbols"), "U+20A0-U+20CF", CURRENCY_SYMBOLS_CURRENCY_SYMBOLS),
    IN_LETTERLIKE_SYMBOLS(keys("InLetterlike_Symbols"), "U+2100-U+214F", LETTERLIKE_SYMBOLS_LETTERLIKE_SYMBOLS_TO_LOWERCASE_CLAUDIAN_LETTER),
    IN_NUMBER_FORMS(keys("InNumber_Forms"), "U+2150-U+218F", NUMBER_FORMS_FRACTIONS_TO_TURNED_DIGITS),
    IN_ARROWS(keys("InArrows"), "U+2190-U+21FF", ARROWS_SIMPLE_ARROWS_TO_MISCELLANEOUS_ARROWS),
    IN_MATHEMATICAL_OPERATORS(keys("InMathematical_Operators"), "U+2200-U+22FF", MATHEMATICAL_OPERATORS_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_TO_RELATIONS),
    IN_MISCELLANEOUS_TECHNICAL(keys("InMiscellaneous_Technical"), "U+2300-U+23FF", MISCELLANEOUS_TECHNICAL_MISCELLANEOUS_TECHNICAL_TO_POWER_SYMBOL_FROM_IEEE_1621_2004),
    IN_CONTROL_PICTURES(keys("InControl_Pictures"), "U+2400-U+243F", CONTROL_PICTURES_GRAPHIC_PICTURES_FOR_CONTROL_CODES_TO_SPECIFIC_SYMBOL_FOR_CONTROL_CODE),
    IN_OPTICAL_CHARACTER_RECOGNITION(keys("InOptical_Character_Recognition"), "U+2440-U+245F", OPTICAL_CHARACTER_RECOGNITION_OCR_A_TO_OCR),
    IN_ENCLOSED_ALPHANUMERICS(keys("InEnclosed_Alphanumerics"), "U+2460-U+24FF", ENCLOSED_ALPHANUMERICS_CIRCLED_NUMBERS_TO_DOUBLE_CIRCLED_NUMBERS),
    IN_BOX_DRAWING(keys("InBox_Drawing"), "U+2500-U+257F", BOX_DRAWING_LIGHT_AND_HEAVY_SOLID_LINES_TO_MIXED_LIGHT_AND_HEAVY_LINES),
    IN_BLOCK_ELEMENTS(keys("InBlock_Elements"), "U+2580-U+259F", BLOCK_ELEMENTS_BLOCK_ELEMENTS_TO_TERMINAL_GRAPHIC_CHARACTERS),
    IN_GEOMETRIC_SHAPES(keys("InGeometric_Shapes"), "U+25A0-U+25FF", GEOMETRIC_SHAPES_GEOMETRIC_SHAPES_TO_GEOMETRIC_SHAPES),
    IN_MISCELLANEOUS_SYMBOLS(keys("InMiscellaneous_Symbols"), "U+2600-U+26FF", MISCELLANEOUS_SYMBOLS_WEATHER_AND_ASTROLOGICAL_SYMBOLS_TO_MAP_SYMBOLS_FROM_ARIB_STD_B24),
    IN_DINGBATS(keys("InDingbats"), "U+2700-U+27BF", DINGBATS_MISCELLANEOUS_TO_DINGBAT_ARROWS),
    IN_BRAILLE_PATTERNS(keys("InBraille_Patterns", "Braille"), "U+2800-U+28FF", BRAILLE_PATTERNS_BRAILLE_PATTERNS),
    IN_SUPPLEMENTAL_MATHEMATICAL_OPERATORS(keys("InSupplemental_Mathematical_Operators"), "U+2A00-U+2AFF", SUPPLEMENTAL_MATHEMATICAL_OPERATORS_N_ARY_OPERATORS_TO_OPERATORS),
    IN_MISCELLANEOUS_SYMBOLS_AND_ARROWS(keys("InMiscellaneous_Symbols_and_Arrows"), "U+2B00-U+2BFF", MISCELLANEOUS_SYMBOLS_AND_ARROWS_WHITE_AND_BLACK_ARROWS_TO_SYMBOLS_USED_IN_CHESS_NOTATION),
    IN_CJK_RADICALS_SUPPLEMENT(keys("InCJK_Radicals_Supplement"), "U+2E80-U+2EFF", CJK_RADICALS_SUPPLEMENT_CJK_RADICALS_SUPPLEMENT),
    IN_KANGXI_RADICALS(keys("InKangxi_Radicals"), "U+2F00-U+2FDF", KANGXI_RADICALS_KANGXI_RADICALS_SUBSET_1),
    IN_IDEOGRAPHIC_DESCRIPTION_CHARACTERS(keys("InIdeographic_Description_Characters"), "U+2FF0-U+2FFF", IDEOGRAPHIC_DESCRIPTION_CHARACTERS_IDEOGRAPHIC_DESCRIPTION_CHARACTERS),
    IN_CJK_SYMBOLS_AND_PUNCTUATION(keys("InCJK_Symbols_and_Punctuation"), "U+3000-U+303F", CJK_SYMBOLS_AND_PUNCTUATION_CJK_SYMBOLS_AND_PUNCTUATION_TO_SPECIAL_CJK_INDICATORS),
    IN_HIRAGANA(keys("InHiragana", "Hiragana"), "U+3040-U+309F", CJK_SYMBOLS_AND_PUNCTUATION_HIRAGANA_TO_ITERATION_MARKS),
    IN_KATAKANA(keys("InKatakana", "Katakana"), "U+30A0-U+30FF", KATAKANA_KATAKANA_PUNCTUATION_TO_ITERATION_MARKS),
    IN_BOPOMOFO(keys("InBopomofo", "Bopomofo"), "U+3100-U+312F", KATAKANA_BOPOMOFO_TO_MISCELLANEOUS_ADDITIONS),
    IN_HANGUL_COMPATIBILITY_JAMO(keys("InHangul_Compatibility_Jamo"), "U+3130-U+318F", KATAKANA_HANGUL_COMPATIBILITY_JAMO_TO_OLD_VOWEL_LETTERS),
    IN_KANBUN(keys("InKanbun"), "U+3190-U+319F", KANBUN_TATETEN_TO_KAERITEN),
    IN_BOPOMOFO_EXTENDED(keys("InBopomofo_Extended"), "U+31A0-U+31BF", BOPOMOFO_EXTENDED_EXTENDED_BOPOMOFO_FOR_MINNAN_AND_HAKKA_TO_EXTENDED_BOPOMOFO_FOR_CANTONESE),
    IN_KATAKANA_PHONETIC_EXTENSIONS(keys("InKatakana_Phonetic_Extensions"), "U+31F0-U+31FF", KATAKANA_PHONETIC_EXTENSIONS_PHONETIC_EXTENSIONS_FOR_AINU),
    IN_ENCLOSED_CJK_LETTERS_AND_MONTHS(keys("InEnclosed_CJK_Letters_and_Months"), "U+3200-U+32FF", ENCLOSED_CJK_LETTERS_AND_MONTHS_PARENTHESIZED_HANGUL_LETTERS_TO_CIRCLED_KATAKANA),
    IN_CJK_COMPATIBILITY(keys("InCJK_Compatibility"), "U+3300-U+33FF", CJK_COMPATIBILITY_SQUARED_KATAKANA_WORDS_TO_TELEGRAPH_SYMBOLS_FOR_DAYS),
    IN_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A(keys("InCJK_Unified_Ideographs_Extension_A"), "U+3400-U+4DBF", CJK_COMPATIBILITY_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A),
    IN_YIJING_HEXAGRAM_SYMBOLS(keys("InYijing_Hexagram_Symbols", "Yi"), "U+4DC0-U+4DFF", YIJING_HEXAGRAM_SYMBOLS_YIJING_HEXAGRAM_SYMBOLS),
    IN_CJK_UNIFIED_IDEOGRAPHS(keys("InCJK_Unified_Ideographs"), "U+4E00-U+9FFF", YIJING_HEXAGRAM_SYMBOLS_CJK_UNIFIED_IDEOGRAPHS),
    IN_YI_SYLLABLES(keys("InYi_Syllables"), "U+A000-U+A48F", YI_SYLLABLES_SYLLABLES_TO_SYLLABLES_1),
    IN_YI_RADICALS(keys("InYi_Radicals"), "U+A490-U+A4CF", YI_RADICALS_YI_RADICALS),
    IN_HANGUL_SYLLABLES(keys("InHangul_Syllables"), "U+AC00-U+D7AF", MEETEI_MAYEK_HANGUL_SYLLABLES),
    IN_PRIVATE_USE_AREA(keys("InPrivate_Use_Area"), "U+E000-U+F8FF", range(0xE000, 0xF8FF)),
    IN_CJK_COMPATIBILITY_IDEOGRAPHS(keys("InCJK_Compatibility_Ideographs"), "U+F900-U+FAFF", range(0xF900, 0xFAFF)),
    IN_ALPHABETIC_PRESENTATION_FORMS(keys("InAlphabetic_Presentation_Forms"), "U+FB00-U+FB4F", range(0xFB00, 0xFB4F)),
    IN_VARIATION_SELECTORS(keys("InVariation_Selectors"), "U+FE00-U+FE0F", range(0xFE00, 0xFE0F)),
    IN_COMBINING_HALF_MARKS(keys("InCombining_Half_Marks"), "U+FE20-U+FE2F", range(0xFE20, 0xFE2F)),
    IN_CJK_COMPATIBILITY_FORMS(keys("InCJK_Compatibility_Forms"), "U+FE30-U+FE4F", range(0xFE30, 0xFE4F)),
    IN_SMALL_FORM_VARIANTS(keys("InSmall_Form_Variants"), "U+FE50-U+FE6F", range(0xFE50, 0xFE6F)),
    IN_HALFWIDTH_AND_FULLWIDTH_FORMS(keys("InHalfwidth_and_Fullwidth_Forms"), "U+FF00-U+FFEF", range(0xFF00, 0xFFEF)),
    IN_SPECIALS(keys("InSpecials"), "U+FFF0-U+FFFF", range(0xFFF0, 0xFFFF)),

    // =============================================================================================
    // Below this line categories are disabled because I'm having troubles generating these values.
    // Please open a feature ticket if these are needed - then I'll invest time in those.

    //IN_LATIN_1_SUPPLEMENT(keys("InLatin-1_Supplement"), "U+0080-U+00FF", range(0x0080, 0x00FF)),
    //IN_LATIN_EXTENDED_A(keys("InLatin_Extended-A"), "U+0100-U+017F", range(0x0100, 0x017F)),
    //IN_LATIN_EXTENDED_B(keys("InLatin_Extended-B"), "U+0180-U+024F", range(0x0180, 0x024F)),
    //IN_GREEK_AND_COPTIC(keys("InGreek_and_Coptic"), "U+0370-U+03FF", range(0x0370, 0x03FF)),

    //CASED_LETTER(keys("L&", "Cased_Letter"), "a letter that exists in lowercase and uppercase variants (combination of Ll, Lu and Lt).", asList(LATIN_UPPERCASE, LATIN_LOWERCASE, range('À', 'Ö'), range('Ø', 'ö'), range('ø', 'İ'), range('Ĳ', 'ķ'), range('Ĺ', 'ň'), range('Ŋ', 'ž'), range('ƀ', 'ƌ'), range('Ǝ', 'ƚ'), range('Ɯ', 'Ʃ'), range('Ƭ', 'ƹ'), range('Ƽ', 'ƽ'), range('ǆ', 'Ǉ'), range('ǉ', 'Ǌ'), range('ǌ', 'ǯ'), range('ǳ', 'Ƞ'), range('Ȣ', 'ȳ'), range('Ⱥ', 'ɔ'), range('ɖ', 'ɗ'), range('ɛ', 'ɜ'), range('ɠ', 'ɡ'), range('ɥ', 'ɦ'), range('ɨ', 'ɬ'), range('ɱ', 'ɲ'), range('ʇ', 'ʌ'), range('ʝ', 'ʞ'), range('Ͱ', 'ͳ'), range('Ͷ', 'ͷ'), range('ͻ', 'ͽ'), range('Έ', 'Ί'), range('Ύ', 'Ώ'), range('Α', 'Ρ'), range('Σ', 'ί'), range('α', 'ρ'), range('σ', 'Ϗ'), range('ϗ', 'ϯ'), range('ϲ', 'ϴ'), range('Ϸ', 'ϻ'), range('Ͻ', 'ҁ'), range('Ҋ', 'ԯ'), range('Ա', 'Ֆ'), range('ա', 'ֆ'), range('Ⴀ', 'Ⴥ'), range('Ꭰ', 'Ᏽ'), range('ᏸ', 'ᏽ'), range('Ḁ', 'ẕ'), range('Ạ', 'ἕ'), range('Ἐ', 'Ἕ'), range('ἠ', 'ὅ'), range('Ὀ', 'Ὅ'), range('Ὗ', 'ώ'), range('ᾰ', 'ᾱ'), range('Ᾰ', 'Ά'), range('Ὲ', 'Ή'), range('ῐ', 'ῑ'), range('Ῐ', 'Ί'), range('ῠ', 'ῡ'), range('Ῠ', 'Ῥ'), range('Ὸ', 'Ώ'), range('K', 'Å'), range('Ↄ', 'ↄ'), range('Ⰰ', 'Ⱞ'), range('ⰰ', 'ⱞ'), range('Ⱡ', 'Ɒ'), range('Ⱳ', 'ⱳ'), range('Ⱶ', 'ⱶ'), range('Ȿ', 'ⳣ'), range('Ⳬ', 'ⳮ'), range('Ⳳ', 'ⳳ'), range('ⴀ', 'ⴥ'), range('Ꙁ', 'ꙭ'), range('Ꚁ', 'ꚛ'), range('Ꜣ', 'ꜯ'), range('Ꜳ', 'ꝯ'), range('Ꝺ', 'ꞇ'), range('Ꞌ', 'Ɥ'), range('Ꞑ', 'ꞓ'), range('Ꞗ', 'Ɪ'), range('Ʞ', 'ꞷ'), range('ꭰ', 'ꮿ'), range('Ａ', 'Ｚ'), range('ａ', 'ｚ')), new char[]{'ƿ', 'Ǆ', 'Ǳ', 'ə', 'ɣ', 'ɯ', 'ɵ', 'ɽ', 'ʀ', 'ʃ', 'ʒ', 'Ϳ', 'Ά', 'Ό', 'Ⴧ', 'Ⴭ', 'ᵹ', 'ᵽ', 'ẞ', 'ὑ', 'ὓ', 'ὕ', 'ὗ', 'Ὑ', 'Ὓ', 'Ὕ', 'ῥ', 'Ω', 'Ⅎ', 'ⅎ', 'ⴧ', 'ⴭ', 'ꭓ'}),

    //IN_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS(keys("InCombining_Diacritical_Marks_for_Symbols"), "U+20D0-U+20FF", range(0x20D0, 0x20FF)),

    //IN_SUPPLEMENTAL_ARROWS_B(keys("InSupplemental_Arrows-B"), "U+2900-U+297F", range(0x2900, 0x297F)),
    //IN_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B(keys("InMiscellaneous_Mathematical_Symbols-B"), "U+2980-U+29FF", range(0x2980, 0x29FF)),

    //IN_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A(keys("InMiscellaneous_Mathematical_Symbols-A"), "U+27C0-U+27EF", range(0x27C0, 0x27EF)),
    //IN_SUPPLEMENTAL_ARROWS_A(keys("InSupplemental_Arrows-A"), "U+27F0-U+27FF", range(0x27F0, 0x27FF)),

    //IN_ARABIC_PRESENTATION_FORMS_A(keys("InArabic_Presentation_Forms-A"), "U+FB50-U+FDFF", range(0xFB50, 0xFDFF)),
    //IN_ARABIC_PRESENTATION_FORMS_B(keys("InArabic_Presentation_Forms-B"), "U+FE70-U+FEFF", range(0xFE70, 0xFEFF)),
//    IN_HIGH_SURROGATES(keys("InHigh_Surrogates"), "U+D800-U+DB7F", range(0xD800, 0xDB7F)),
//    IN_HIGH_PRIVATE_USE_SURROGATES(keys("InHigh_Private_Use_Surrogates"), "U+DB80-U+DBFF", range(0xDB80, 0xDBFF))
//    IN_LOW_SURROGATES(keys("InLow_Surrogates"), "U+DC00-U+DFFF", range(0xDC00, 0xDFFF)),

    // OTHER(keys("C", "Other"), "invisible control characters and unused code points.", asList(C1_CONTROLS, UNUSED_CODEPOINT_1, UNUSED_CODEPOINTS_2, UNUSED_CODEPOINTS_3, UNUSED_CODEPOINTS_4, UNUSED_CODEPOINTS_5, UNUSED_CODEPOINTS_6, range('׵', '؅'), range('؜', '؝'), range('܎', '܏'), UNUSED_CODEPOINTS_7, UNUSED_CODEPOINTS_8, UNUSED_CODEPOINTS_9, UNUSED_CODEPOINTS_10, UNUSED_CODEPOINTS_11, UNUSED_CODEPOINTS_12, UNUSED_CODEPOINTS_13, UNUSED_CODEPOINTS_14, UNUSED_CODEPOINTS_15, UNUSED_CODEPOINTS_16, UNUSED_CODEPOINTS_17, UNUSED_CODEPOINTS_18, UNUSED_CODEPOINTS_19, UNUSED_CODEPOINTS_20, UNUSED_CODEPOINTS_21, UNUSED_CODEPOINTS_22, RANGE_470, RANGE_16, RANGE_147, RANGE_296, RANGE_490, RANGE_610, RANGE_724, RANGE_80, RANGE_361, RANGE_122, RANGE_117, RANGE_542, RANGE_607, RANGE_280, RANGE_585, RANGE_428, RANGE_515, RANGE_659, RANGE_161, RANGE_256, RANGE_364, RANGE_579, RANGE_104, RANGE_531, RANGE_204, RANGE_439, RANGE_645, RANGE_25, RANGE_156, RANGE_479, RANGE_668, RANGE_174, RANGE_241, RANGE_390, RANGE_408, RANGE_297, RANGE_726, RANGE_276, RANGE_462, RANGE_725, RANGE_118, RANGE_546, RANGE_0, RANGE_281, RANGE_604, RANGE_400, RANGE_106, RANGE_700, RANGE_460, RANGE_567, RANGE_20, RANGE_119, RANGE_565, RANGE_169, RANGE_285, RANGE_322, RANGE_309, RANGE_442, RANGE_573, RANGE_632, RANGE_459, RANGE_202, RANGE_544, RANGE_70, RANGE_209, RANGE_292, RANGE_465, RANGE_414, RANGE_543, RANGE_728, RANGE_344, RANGE_633, RANGE_28, RANGE_369, RANGE_615, RANGE_634, RANGE_630, RANGE_523, RANGE_303, RANGE_472, RANGE_224, RANGE_732, RANGE_600, RANGE_595, RANGE_489, RANGE_448, RANGE_519, RANGE_45, RANGE_382, range('᠎', '᠏'), RANGE_349, RANGE_162, RANGE_525, RANGE_669, RANGE_363, RANGE_706, RANGE_79, RANGE_316, RANGE_463, RANGE_183, RANGE_94, RANGE_454, RANGE_380, RANGE_271, RANGE_550, RANGE_157, RANGE_590, RANGE_242, RANGE_320, RANGE_628, RANGE_267, RANGE_260, RANGE_636, RANGE_548, RANGE_416, RANGE_15, RANGE_614, RANGE_66, RANGE_192, RANGE_362, RANGE_652, RANGE_308, RANGE_172, RANGE_19, RANGE_686, range('⁠', '⁯'), RANGE_33, RANGE_223, RANGE_249, RANGE_568, RANGE_228, RANGE_625, RANGE_667, RANGE_438, RANGE_443, RANGE_480, RANGE_293, RANGE_176, RANGE_617, RANGE_279, RANGE_405, RANGE_187, RANGE_377, RANGE_464, RANGE_655, RANGE_627, RANGE_355, RANGE_431, RANGE_96, RANGE_148, RANGE_422, RANGE_510, RANGE_649, RANGE_397, RANGE_87, RANGE_597, RANGE_401, RANGE_721, RANGE_730, RANGE_447, RANGE_717, RANGE_284, RANGE_181, RANGE_381, RANGE_58, RANGE_103, RANGE_505, RANGE_613, RANGE_421, RANGE_245, RANGE_714, RANGE_240, RANGE_307, RANGE_709, RANGE_287, RANGE_478, RANGE_656, RANGE_152, RANGE_123, RANGE_373, RANGE_12, RANGE_44, range('퟼', ''), RANGE_676, RANGE_93, RANGE_321, RANGE_641, RANGE_646, RANGE_60, RANGE_315, RANGE_62, RANGE_467, RANGE_334, RANGE_622, range('﻽', '＀'), RANGE_589, RANGE_27, RANGE_227, RANGE_368, RANGE_497, range('￯', '￻')), new char[]{'­', '΋', '΍', '΢', '԰', 'ՠ', 'ֈ', '֐', '۝', '࠿', '࡟', 'ࢵ', '࣢', '঄', '঩', '঱', '৞', '਄', '਩', '਱', '਴', '਷', '਽', '੝', '઄', '઎', '઒', '઩', '઱', '઴', '૆', '૊', '଀', '଄', '଩', '଱', '଴', '୞', '஄', '஑', '஛', '஝', '௉', 'ఄ', '఍', '఑', '఩', '౅', '౉', '౗', '಄', '಍', '಑', '಩', '಴', '೅', '೉', '೟', '೰', 'ഄ', '഍', '഑', '൅', '൉', '඄', '඲', '඼', '෕', '෗', '຃', 'ຉ', 'ຘ', 'ຠ', '຤', '຦', 'ຬ', '຺', '໅', '໇', '཈', '྘', '྽', '࿍', '჆', '቉', '቗', '቙', '኉', '኱', '኿', '዁', '዗', '጑', 'ᜍ', '᝭', '᝱', '᤟', '᩟', '᷺', '὘', '὚', '὜', '὞', '᾵', '῅', '῜', '῵', '῿', '₏', '⯉', 'Ⱟ', 'ⱟ', '⴦', '⶧', '⶯', '⶷', '⶿', '⷇', '⷏', '⷗', '⷟', '⺚', '぀', '㆏', '㈟', 'ꞯ', '꧎', '꧿', '꬧', '꬯', '﬷', '﬽', '﬿', '﭂', '﭅', '﹓', '﹧', '﹵', '￧', '￾'}),
    // UNASSIGNED(keys("Cn", "Unassigned"), "any code point to which no character has been assigned.", asList(UNUSED_CODEPOINT_1, UNUSED_CODEPOINTS_2, UNUSED_CODEPOINTS_3, UNUSED_CODEPOINTS_4, UNUSED_CODEPOINTS_5, UNUSED_CODEPOINTS_6, range('׵', '׿'), UNUSED_CODEPOINTS_7, UNUSED_CODEPOINTS_8, UNUSED_CODEPOINTS_9, UNUSED_CODEPOINTS_10, UNUSED_CODEPOINTS_11, UNUSED_CODEPOINTS_12, UNUSED_CODEPOINTS_13, UNUSED_CODEPOINTS_14, UNUSED_CODEPOINTS_15, UNUSED_CODEPOINTS_16, UNUSED_CODEPOINTS_17, UNUSED_CODEPOINTS_18, UNUSED_CODEPOINTS_19, UNUSED_CODEPOINTS_20, UNUSED_CODEPOINTS_21, UNUSED_CODEPOINTS_22, RANGE_470, RANGE_16, RANGE_147, RANGE_296, RANGE_490, RANGE_610, RANGE_724, RANGE_80, RANGE_361, RANGE_122, RANGE_117, RANGE_542, RANGE_607, RANGE_280, RANGE_585, RANGE_428, RANGE_515, RANGE_659, RANGE_161, RANGE_256, RANGE_364, RANGE_579, RANGE_104, RANGE_531, RANGE_204, RANGE_439, RANGE_645, RANGE_25, RANGE_156, RANGE_479, RANGE_668, RANGE_174, RANGE_241, RANGE_390, RANGE_408, RANGE_297, RANGE_726, RANGE_276, RANGE_462, RANGE_725, RANGE_118, RANGE_546, RANGE_0, RANGE_281, RANGE_604, RANGE_400, RANGE_106, RANGE_700, RANGE_460, RANGE_567, RANGE_20, RANGE_119, RANGE_565, RANGE_169, RANGE_285, RANGE_322, RANGE_309, RANGE_442, RANGE_573, RANGE_632, RANGE_459, RANGE_202, RANGE_544, RANGE_70, RANGE_209, RANGE_292, RANGE_465, RANGE_414, RANGE_543, RANGE_728, RANGE_344, RANGE_633, RANGE_28, RANGE_369, RANGE_615, RANGE_634, RANGE_630, RANGE_523, RANGE_303, RANGE_472, RANGE_224, RANGE_732, RANGE_600, RANGE_595, RANGE_489, RANGE_448, RANGE_519, RANGE_45, RANGE_382, RANGE_349, RANGE_162, RANGE_525, RANGE_669, RANGE_363, RANGE_706, RANGE_79, RANGE_316, RANGE_463, RANGE_183, RANGE_94, RANGE_454, RANGE_380, RANGE_271, RANGE_550, RANGE_157, RANGE_590, RANGE_242, RANGE_320, RANGE_628, RANGE_267, RANGE_260, RANGE_636, RANGE_548, RANGE_416, RANGE_15, RANGE_614, RANGE_66, RANGE_192, RANGE_362, RANGE_652, RANGE_308, RANGE_172, RANGE_33, RANGE_223, RANGE_249, RANGE_568, RANGE_228, RANGE_625, RANGE_667, RANGE_438, RANGE_443, RANGE_480, RANGE_293, RANGE_176, RANGE_617, RANGE_279, RANGE_405, RANGE_187, RANGE_377, RANGE_464, RANGE_655, RANGE_627, RANGE_355, RANGE_431, RANGE_96, RANGE_148, RANGE_422, RANGE_510, RANGE_649, RANGE_397, RANGE_87, RANGE_597, RANGE_401, RANGE_721, RANGE_730, RANGE_447, RANGE_717, RANGE_284, RANGE_181, RANGE_381, RANGE_58, RANGE_103, RANGE_505, RANGE_613, RANGE_421, RANGE_245, RANGE_714, RANGE_240, RANGE_307, RANGE_709, RANGE_287, RANGE_478, RANGE_656, RANGE_152, RANGE_123, RANGE_373, RANGE_12, RANGE_44, range('퟼', '퟿'), RANGE_676, RANGE_93, RANGE_321, RANGE_641, RANGE_646, RANGE_60, RANGE_315, RANGE_62, RANGE_467, RANGE_334, RANGE_622, range('﻽', '﻾'), RANGE_589, RANGE_27, RANGE_227, RANGE_368, RANGE_497, range('￯', '￸')), new char[]{'΋', '΍', '΢', '԰', 'ՠ', 'ֈ', '֐', '؝', '܎', '࠿', '࡟', 'ࢵ', '঄', '঩', '঱', '৞', '਄', '਩', '਱', '਴', '਷', '਽', '੝', '઄', '઎', '઒', '઩', '઱', '઴', '૆', '૊', '଀', '଄', '଩', '଱', '଴', '୞', '஄', '஑', '஛', '஝', '௉', 'ఄ', '఍', '఑', '఩', '౅', '౉', '౗', '಄', '಍', '಑', '಩', '಴', '೅', '೉', '೟', '೰', 'ഄ', '഍', '഑', '൅', '൉', '඄', '඲', '඼', '෕', '෗', '຃', 'ຉ', 'ຘ', 'ຠ', '຤', '຦', 'ຬ', '຺', '໅', '໇', '཈', '྘', '྽', '࿍', '჆', '቉', '቗', '቙', '኉', '኱', '኿', '዁', '዗', '጑', 'ᜍ', '᝭', '᝱', '᠏', '᤟', '᩟', '᷺', '὘', '὚', '὜', '὞', '᾵', '῅', '῜', '῵', '῿', '⁥', '₏', '⯉', 'Ⱟ', 'ⱟ', '⴦', '⶧', '⶯', '⶷', '⶿', '⷇', '⷏', '⷗', '⷟', '⺚', '぀', '㆏', '㈟', 'ꞯ', '꧎', '꧿', '꬧', '꬯', '﬷', '﬽', '﬿', '﭂', '﭅', '﹓', '﹧', '﹵', '＀', '￧', '￾'}),

    // ===========================================================================================
    // Help needed - unable to find exact list of codepoints in COMMON category
    //COMMON("Common", "", null, null),
    ;

    public static final Map<String, UnicodeCategory> ALL_CATEGORIES = Collections.unmodifiableMap(
            stream(values())
                    .flatMap(UnicodeCategory::allowUseOfHyphenOrSpacesOrUnderscores)
                    .collect(Collectors.toMap(
                            KeyValue::getKey,
                            KeyValue::getValue
                    )));

    private static List<String> keys(String... keys) {
        return asList(keys);
    }

    private static Stream<KeyValue> allowUseOfHyphenOrSpacesOrUnderscores(UnicodeCategory unicodeCategory) {
        Set<String> keys = Util.makeVariations(unicodeCategory.keys, '_', ' ', '-');
        return keys.stream()
                   .map(key -> new KeyValue(key, unicodeCategory));
    }

    private final List<String>      keys;
    private final String            description;
    private final List<SymbolRange> symbolRanges;
    private final char[]            symbols;

    UnicodeCategory(List<String> keys, String description, List<SymbolRange> symbolRanges, char... symbols) {
        this.keys = keys;
        this.description = description;
        this.symbolRanges = symbolRanges;
        this.symbols = symbols;
    }

    UnicodeCategory(List<String> keys, String description, SymbolRange symbolRange, char[] symbols) {
        this(keys, description, singletonList(symbolRange), symbols);
    }

    UnicodeCategory(List<String> keys, String description, char... symbols) {
        this(keys, description, emptyList(), symbols);
    }

    UnicodeCategory(List<String> keys, String description, SymbolRange symbolRange) {
        this(keys, description, singletonList(symbolRange), ZERO_LENGTH_CHARACTER_ARRAY);
    }

    public List<SymbolRange> getSymbolRanges() {
        return symbolRanges;
    }

    public char[] getSymbols() {
        return symbols;
    }

    private static class KeyValue {
        private final String          key;
        private final UnicodeCategory value;

        KeyValue(String key, UnicodeCategory value) {
            this.key = key;
            this.value = value;
        }

        public String getKey() {
            return key;
        }

        public UnicodeCategory getValue() {
            return value;
        }
    }

    public List<String> getKeys() {
        return keys;
    }

    public String getDescription() {
        return description;
    }

    public boolean contains(Character c) {
        for (SymbolRange symbolRange : symbolRanges) {
            if (symbolRange.contains(c)) {
                return true;
            }
        }

        for (Character symbol : symbols) {
            if (symbol.equals(c)) {
                return true;
            }
        }

        return false;
    }
}
