/*
 * Decompiled with CFR 0.152.
 */
package javatools.parsers;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javatools.administrative.Announce;
import javatools.administrative.D;
import javatools.administrative.NonsharedParameters;
import javatools.datatypes.FinalMap;
import javatools.filehandlers.FileLines;
import javatools.parsers.Char17;
import javatools.parsers.Language;

public class NameML {
    static File CONFIG_DIR = null;
    static final String PARSINGRESOURCES_PATH = "/javatools/resources/parsing/";
    public static final String ANYNAME = "NAME";
    protected static boolean hasBeenInitialized = false;
    public static String roman = "\\b(?:[XIV]++)\\b";
    public static String of = "\\bof\\b";
    public static final String U = "\\p{Lu}";
    public static final String L = "\\p{Ll}";
    public static final String A = "\\p{L}";
    public static final String B = "(?:[\\s_]++)";
    public static final String BD = "\\b";
    public static final String BC = "[,\\s_]++";
    public static final String DG = "\\d";
    public static final String H = "-";
    public static final String or = "|";
    public static final String familyNamePrefix = "(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')";
    public static final Pattern familyNamePrefixPattern = Pattern.compile("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')");
    public static String attributePrefix = "(?:the|der|die|il|la|le)";
    public static Pattern attributePrefixPattern = Pattern.compile(attributePrefix);
    public static final String familyNameSuffix = "(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)";
    public static final Pattern familyNameSuffixPattern = Pattern.compile("(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)");
    public static Pattern titlePatternEn = null;
    public static Pattern titlePatternDe = null;
    public static Pattern titlePatternFr = null;
    public static Pattern titlePatternEs = null;
    public static Pattern titlePatternIt = null;
    protected static Set<String> titlesForGivenNamesEn = null;
    protected static Set<String> titlesForGivenNamesDe = null;
    protected static Set<String> titlesForGivenNamesEs = null;
    protected static Set<String> titlesForGivenNamesFr;
    protected static Set<String> titlesForGivenNamesIt;
    protected static Set<String> stopWordDE;
    protected static Set<String> stopWordFR;
    protected static Set<String> stopWordES;
    protected static Set<String> stopWordEN;
    protected static Set<String> stopWordIT;
    public static final String companyNameSuffix = "(?:[cC][oO]\\.|[cC][oO]\\b|&(?:[\\s_]++)?[cC][oO]\\.|&(?:[\\s_]++)?[cC][oO]\\b|\\b[cC][oO][rR][pP]\\.|\\b[cC][oO][rR][pP]\\b|\\bR[cC]orporation\\b|\\b[iI][nN][cC]\\.|\\b[iI][nN][cC]\\b|\\b[iI]ncorporated\\b|\\b[iI]ncorporation\\b|\\b[iI]ncorp\\.?|\\b[iI]ncorp\\b|\\b[lL][tT][dD]\\.|\\b[lL][tT][dD]\\b|\\b[lL]imited\\b|\\bp\\.l\\.c\\.\\b|\\bPty\\.\\b|\\bLLC\\b|\\bAG\\b|\\bGmbH\\b|\\bKG\\b|\\bOHG\\b|\\bS\\.R\\.L\\.\\b|\\bS\\.p\\.A\\.\\b|\\bS\\.A\\.\\b)";
    public static final Pattern companyNameSuffixPattern;
    public static final String teamName = "\\b\\p{Lu}[\\w\\s\\.]+\\b";
    public static final Pattern teamNamePattern;
    public static final String prep = "(?:on|of|for)";
    public static final String laxName = "\\b\\p{Lu}.*\\b";
    public static final Pattern laxNamePattern;
    public static final String safeName = "\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b";
    public static final Pattern safeNamePattern;
    public static final Pattern safeNamesPattern;
    public static final Pattern safeNamesPatternNoPrep;
    protected String original;
    protected String normalized;
    public static final Pattern laxAbbreviationPattern;
    public static final Pattern safeAbbreviationPattern;
    public static final Pattern laxCompanyPattern;
    public static final Pattern safeCompanyPattern;
    public static final String directFamilyNamePrefix = "\\b(?:(?:al-|Mc|Di|De|Mac|O')(?:[\\s_]++)?)";
    public static final String personNameComponent = "\\p{Lu}\\p{Ll}+";
    public static final String givenNameComponent;
    public static final String givenName;
    public static final String givenNames;
    public static final String familyName;
    public static final String nickName = "(?:'[^']')";
    public static Pattern laxPersonNamePatternEn;
    public static Pattern laxPersonNamePatternDe;
    public static Pattern laxPersonNamePatternEs;
    public static Pattern laxPersonNamePatternFr;
    public static Pattern laxPersonNamePatternIt;
    public static Pattern safePersonNamePatternEn;
    public static Pattern safePersonNamePatternDe;
    public static Pattern safePersonNamePatternEs;
    public static Pattern safePersonNamePatternFr;
    public static Pattern safePersonNamePatternIt;
    public static Map<String, String> usStates;
    public static Map<String, String> languageCodes;
    public static Map<String, String> nationality2country;

    public static final void init(NonsharedParameters params) {
        NameML.init();
    }

    public static final void init(String configPath) {
        CONFIG_DIR = new File(configPath + "parsing/");
        NameML.init();
    }

    public static final void init() {
        if (hasBeenInitialized) {
            return;
        }
        titlePatternEn = NameML.createTitlePattern(Language.ENGLISH);
        titlePatternDe = NameML.createTitlePattern(Language.GERMAN);
        titlePatternFr = NameML.createTitlePattern(Language.FRENCH);
        titlePatternEs = NameML.createTitlePattern(Language.SPANISH);
        titlePatternIt = NameML.createTitlePattern(Language.ITALIAN);
        titlesForGivenNamesEn = NameML.readTextFileLinesSet("titles." + Language.ENGLISH.getId());
        titlesForGivenNamesDe = NameML.readTextFileLinesSet("titles." + Language.GERMAN.getId());
        titlesForGivenNamesEs = NameML.readTextFileLinesSet("titles." + Language.SPANISH.getId());
        titlesForGivenNamesFr = NameML.readTextFileLinesSet("titles." + Language.FRENCH.getId());
        titlesForGivenNamesIt = NameML.readTextFileLinesSet("titles." + Language.ITALIAN.getId());
        stopWordDE = NameML.readTextFileLinesSet("stopwords." + Language.GERMAN.getId());
        stopWordFR = NameML.readTextFileLinesSet("stopwords." + Language.FRENCH.getId());
        stopWordES = NameML.readTextFileLinesSet("stopwords." + Language.SPANISH.getId());
        stopWordEN = NameML.readTextFileLinesSet("stopwords." + Language.ENGLISH.getId());
        stopWordIT = NameML.readTextFileLinesSet("stopwords." + Language.ITALIAN.getId());
        laxPersonNamePatternEn = NameML.createLaxPersonNamePattern(titlePatternEn);
        laxPersonNamePatternDe = NameML.createLaxPersonNamePattern(titlePatternDe);
        laxPersonNamePatternEs = NameML.createLaxPersonNamePattern(titlePatternEs);
        laxPersonNamePatternFr = NameML.createLaxPersonNamePattern(titlePatternFr);
        laxPersonNamePatternIt = NameML.createLaxPersonNamePattern(titlePatternIt);
        safePersonNamePatternEn = NameML.createSafePersonNamePattern(titlePatternEn);
        safePersonNamePatternDe = NameML.createSafePersonNamePattern(titlePatternDe);
        safePersonNamePatternEs = NameML.createSafePersonNamePattern(titlePatternEs);
        safePersonNamePatternFr = NameML.createSafePersonNamePattern(titlePatternFr);
        safePersonNamePatternIt = NameML.createSafePersonNamePattern(titlePatternIt);
        hasBeenInitialized = true;
    }

    public static String mul(String s) {
        return "(?:" + s + B + ")*" + s;
    }

    public static String mulHyp(String s) {
        return "(?:" + s + H + ")*" + s;
    }

    public static String opt(String s) {
        return "(?:" + s + ")?";
    }

    public static String optMul(String s) {
        return "(?:" + s + ")*";
    }

    public static String or(String s1, String s2) {
        return "(?:" + s1 + or + s2 + ")";
    }

    public static String c(String s) {
        return "(" + s + ")";
    }

    public static boolean isFamilyNamePrefix(String s) {
        return familyNamePrefixPattern.matcher(s).matches();
    }

    public static boolean isAttributePrefix(String s) {
        return s.matches(attributePrefix);
    }

    public static boolean isPersonNameSuffix(String s) {
        return familyNameSuffixPattern.matcher(s).matches();
    }

    private static Pattern createTitlePattern(Language lang) {
        StringBuilder titleRegExp = new StringBuilder();
        titleRegExp.append("\\b(?:");
        boolean first = true;
        try {
            List<String> titles = NameML.readTextFileLines("titles." + lang.getId(), "UTF-8");
            for (String title : titles) {
                if ((title = title.trim()).startsWith("##") || title.length() <= 0) continue;
                if (first) {
                    first = false;
                } else {
                    titleRegExp.append('|');
                }
                titleRegExp.append(title);
            }
            titleRegExp.append(")");
            return Pattern.compile(titleRegExp.toString());
        }
        catch (IOException e) {
            return null;
        }
    }

    public static boolean isTitle(String s, Language lang) {
        if (lang.equals(Language.ENGLISH)) {
            return titlePatternEn.matcher(s).matches();
        }
        if (lang.equals(Language.FRENCH)) {
            return titlePatternFr.matcher(s).matches();
        }
        if (lang.equals(Language.GERMAN)) {
            return titlePatternDe.matcher(s).matches();
        }
        if (lang.equals(Language.SPANISH)) {
            return titlePatternEs.matcher(s).matches();
        }
        if (lang.equals(Language.ITALIAN)) {
            return titlePatternIt.matcher(s).matches();
        }
        throw new IllegalArgumentException("Unsupported Language");
    }

    public static boolean isCompanyNameSuffix(String s) {
        return companyNameSuffixPattern.matcher(s).matches();
    }

    public static boolean isName(String s) {
        return safeNamePattern.matcher(s).matches();
    }

    public static boolean isNames(String s) {
        return safeNamesPattern.matcher(s).matches();
    }

    public static boolean couldBeName(String s) {
        return laxNamePattern.matcher(s).matches();
    }

    public static boolean isStopWord(String w, Language l) {
        if (w == null) {
            return true;
        }
        if (l == Language.ENGLISH) {
            return stopWordEN.contains(w);
        }
        if (l == Language.FRENCH) {
            return stopWordFR.contains(w);
        }
        if (l == Language.GERMAN) {
            return stopWordDE.contains(w);
        }
        if (l == Language.ITALIAN) {
            return stopWordIT.contains(w);
        }
        if (l == Language.SPANISH) {
            return stopWordES.contains(w);
        }
        return stopWordEN.contains(w);
    }

    public String toString() {
        return this.original;
    }

    public String normalize() {
        if (this.normalized == null) {
            this.normalized = this.original.replaceAll(B, "_").replaceAll("([\\P{L}&&[^\\d]&&[^_]])", "");
        }
        return this.normalized;
    }

    protected NameML(String s) {
        this.original = s;
    }

    public String describe() {
        return "Name\n  Original: " + this.original + "\n  Normalized: " + this.normalize();
    }

    public String original() {
        return this.original;
    }

    public static List<String> readTextFileLines(String configFile, String encoding) throws IOException {
        if (encoding == null) {
            encoding = "UTF-8";
        }
        BufferedReader fi = new BufferedReader(new InputStreamReader(NameML.getConfigFileStream(configFile), encoding));
        ArrayList<String> lines = new ArrayList<String>();
        String line = null;
        while ((line = fi.readLine()) != null) {
            lines.add(line);
        }
        fi.close();
        return lines;
    }

    public static InputStream getConfigFileStream(String configfile) throws FileNotFoundException {
        if (CONFIG_DIR != null) {
            return new FileInputStream(new File(CONFIG_DIR, configfile));
        }
        return NameML.class.getResourceAsStream(PARSINGRESOURCES_PATH + configfile);
    }

    public static Set<String> readTextFileLinesSet(String configFile) {
        List<String> lines;
        try {
            lines = NameML.readTextFileLines(configFile, "UTF-8");
        }
        catch (IOException e) {
            e.printStackTrace();
            return null;
        }
        HashSet<String> set = new HashSet<String>((int)((double)lines.size() / 0.75));
        for (String item : lines) {
            if ((item = item.trim()).startsWith("##") || item.length() <= 0) continue;
            set.add(item);
        }
        return set;
    }

    public static boolean isAbbreviation(String word) {
        return safeAbbreviationPattern.matcher(word).matches();
    }

    public static boolean couldBeAbbreviation(String word) {
        return laxAbbreviationPattern.matcher(word).matches();
    }

    public static boolean isCompanyName(String s) {
        return safeCompanyPattern.matcher(s).matches();
    }

    public static boolean couldBeCompanyName(String s) {
        return laxCompanyPattern.matcher(s).matches();
    }

    private static Pattern createLaxPersonNamePattern(Pattern titlePattern) {
        return Pattern.compile(NameML.c(NameML.optMul(titlePattern.pattern() + B)) + NameML.c(NameML.optMul(givenName + B)) + NameML.opt(NameML.c(nickName) + B) + NameML.opt(NameML.c(attributePrefix) + B) + NameML.opt(NameML.c(familyNamePrefix) + B) + NameML.c(familyName) + NameML.opt(BC + NameML.c(familyNameSuffix)) + NameML.opt(B + NameML.c(roman)) + NameML.opt(B + of + B + NameML.c(personNameComponent)) + NameML.opt(B + NameML.c(nickName)));
    }

    private static Pattern createSafePersonNamePattern(Pattern titlePattern) {
        return Pattern.compile(titlePattern.pattern() + B + givenNames + B + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + NameML.opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + or + titlePattern.pattern() + B + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + NameML.opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + or + givenName + B + roman + or + givenNames + B + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + BC + familyNameSuffix + or + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + BC + familyNameSuffix + or + givenName + B + U + "\\." + B + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + NameML.opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)") + or + givenName + B + U + "\\." + B + U + "\\." + B + NameML.opt("(?:[aA]l|[dD][ea]|[dD]el|[dD]e las|[bB]in|[dD]e la|[dD]e los|[dD]i|[zZ]u[mr]|[aA]m|[vV][oa]n de[rnm]|[vV][oa][nm]|[dD]o|[dD]')(?:[\\s_]++)") + familyName + NameML.opt("[,\\s_]++(?:CBE|DBE|GBE|[jJ]r\\.?|[jJ]unior|hijo|hija|P[hH]\\.?[dD]\\.?|KBE|MBE|M\\.?D\\.|OBE|[sS]enior|[sS]r\\.?)"));
    }

    public static boolean couldBePersonName(String s, Language lang) {
        if (NameML.isCompanyName(s)) {
            return false;
        }
        if (lang.equals(Language.ENGLISH)) {
            return laxPersonNamePatternEn.matcher(s).matches();
        }
        if (lang.equals(Language.GERMAN)) {
            return laxPersonNamePatternDe.matcher(s).matches();
        }
        if (lang.equals(Language.SPANISH)) {
            return laxPersonNamePatternEs.matcher(s).matches();
        }
        if (lang.equals(Language.FRENCH)) {
            return laxPersonNamePatternFr.matcher(s).matches();
        }
        if (lang.equals(Language.ITALIAN)) {
            return laxPersonNamePatternIt.matcher(s).matches();
        }
        throw new IllegalArgumentException("Unsupported language.");
    }

    public static boolean isPersonName(String m, Language lang) {
        if (lang.equals(Language.ENGLISH)) {
            return safePersonNamePatternEn.matcher(m).matches();
        }
        if (lang.equals(Language.GERMAN)) {
            return safePersonNamePatternDe.matcher(m).matches();
        }
        if (lang.equals(Language.SPANISH)) {
            return safePersonNamePatternEs.matcher(m).matches();
        }
        if (lang.equals(Language.FRENCH)) {
            return safePersonNamePatternFr.matcher(m).matches();
        }
        if (lang.equals(Language.ITALIAN)) {
            return safePersonNamePatternIt.matcher(m).matches();
        }
        throw new IllegalArgumentException("Unsupported language.");
    }

    public static boolean isUSState(String s) {
        return usStates.values().contains(s.replace('_', ' '));
    }

    public static boolean isUSStateAbbreviation(String s) {
        if (s.endsWith(".")) {
            s = Char17.cutLast(s);
        }
        return usStates.containsKey(s.toUpperCase());
    }

    public static String unabbreviateUSState(String s) {
        if (s.endsWith(".")) {
            s = Char17.cutLast(s);
        }
        return usStates.get(s.toUpperCase());
    }

    public static boolean isLanguage(String s) {
        return languageCodes.values().contains(Char17.upCaseFirst(s));
    }

    public static boolean isLanguageCode(String s) {
        return languageCodes.containsKey(s.toLowerCase());
    }

    public static String languageForCode(String s) {
        return languageCodes.get(s.toLowerCase());
    }

    public static boolean isNation(String s) {
        return nationality2country.values().contains(s);
    }

    public static boolean isNationality(String s) {
        return nationality2country.containsKey(s);
    }

    public static String nationForNationality(String s) {
        return nationality2country.get(s);
    }

    public static NameML of(String s, Language lang) {
        if (NameML.isCompanyName(s)) {
            return new CompanyNameML(s);
        }
        if (NameML.couldBePersonName(s, lang)) {
            return new PersonNameML(s, lang);
        }
        if (NameML.isAbbreviation(s)) {
            return new AbbreviationML(s);
        }
        return new NameML(s);
    }

    public static void main(String[] argv) throws Exception {
        NameML.init();
        Announce.doing("Testing for English");
        for (String s : new FileLines("./testdata/NameParserTest.txt")) {
            D.p(NameML.of(s, Language.ENGLISH).describe());
        }
        Announce.done();
        Announce.doing("Testing for German");
        for (String s : new FileLines("./testdata/NameParserTestDe.txt")) {
            D.p(NameML.of(s, Language.GERMAN).describe());
        }
        Announce.done();
    }

    static {
        companyNameSuffixPattern = Pattern.compile(companyNameSuffix);
        teamNamePattern = Pattern.compile(teamName);
        laxNamePattern = Pattern.compile(laxName);
        safeNamePattern = Pattern.compile(safeName);
        safeNamesPattern = Pattern.compile(safeName + NameML.optMul(B + NameML.opt("(?:on|of|for)(?:[\\s_]++)") + safeName));
        safeNamesPatternNoPrep = Pattern.compile(safeName + NameML.optMul("(?:[\\s_]++)\\b\\p{Lu}(-[\\p{Lu}\\d]|[\\p{Lu}\\p{Ll}\\d]){2,}\\b"));
        laxAbbreviationPattern = Pattern.compile("\\b\\p{Lu}[\\p{Lu}\\d(?:[\\s_]++)-]++\\b");
        safeAbbreviationPattern = Pattern.compile("\\b\\p{Lu}[\\p{Lu}\\d-\\.]++\\b");
        laxCompanyPattern = Pattern.compile("(" + laxNamePattern + ")" + BC + "(" + companyNameSuffix + ")");
        safeCompanyPattern = Pattern.compile("(" + safeNamesPatternNoPrep + NameML.opt(NameML.opt(B) + "&" + NameML.opt(B) + safeNamesPatternNoPrep) + ")" + BC + "(" + companyNameSuffix + ")");
        givenNameComponent = NameML.or(NameML.or("\\p{Lu}\\p{Ll}+\\b", "\\p{Lu}\\p{Ll}*+\\."), "\\p{Lu}\\b");
        givenName = BD + NameML.mulHyp(givenNameComponent);
        givenNames = NameML.mul(givenName);
        familyName = BD + NameML.mulHyp(NameML.opt(directFamilyNamePrefix) + personNameComponent) + BD;
        usStates = new FinalMap<String, String>("AL", "Alabama", "AK", "Alaska", "AS", "American Samoa", "AZ", "Arizona", "AR", "Arkansas", "CA", "California", "CALIF", "California", "CO", "Colorado", "CT", "Connecticut", "DE", "Delaware", "DC", "District of Columbia", "FM", "Federated States of Micronesia", "FL", "Florida", "GA", "Georgia", "GU", "Guam", "HI", "Hawaii", "ID", "Idaho", "IL", "Illinois", "IN", "Indiana", "IA", "Iowa", "KS", "Kansas", "KY", "Kentucky", "LA", "Louisiana", "ME", "Maine", "MH", "Marshall Islands", "MD", "Maryland", "MA", "Massachusetts", "MI", "Michigan", "MN", "Minnesota", "MS", "Mississippi", "MO", "Missouri", "MT", "Montana", "NE", "Nebraska", "NV", "Nevada", "NH", "New Hampshire", "NJ", "New Jersey", "NM", "New Mexico", "NY", "New York", "NC", "North Carolina", "ND", "North Dakota", "MP", "Northern Mariana Islands", "OH", "Ohio", "OK", "Oklahoma", "OR", "Oregon", "PW", "Palau", "PA", "Pennsylvania", "PR", "Puerto Rico", "RI", "Rhode Island", "SC", "South Carolina", "SD", "South Dakota", "TN", "Tennessee", "TX", "Texas", "UT", "Utah", "VT", "Vermont", "VI", "Virgin Islands", "VA", "Virginia", "WA", "Washington", "WV", "West Virginia", "WI", "Wisconsin", "WY", "Wyoming");
        languageCodes = new FinalMap<String, String>("aa", "Afar", "ab", "Abkhazian", "ae", "Avestan", "af", "Afrikaans", "ak", "Akan", "am", "Amharic", "an", "Aragonese", "ar", "Arabic", "as", "Assamese", "av", "Avaric", "ay", "Aymara", "az", "Azerbaijani", "ba", "Bashkir", "be", "Belarusian", "bg", "Bulgarian", "bh", "Bihari", "bi", "Bislama", "bm", "Bambara", "bn", "Bengali", "bo", "Tibetan", "br", "Breton", "bs", "Bosnian", "ca", "Catalan", "ce", "Chechen", "ch", "Chamorro", "co", "Corsican", "cr", "Cree", "cs", "Czech", "cu", "Church", "cv", "Chuvash", "cy", "Welsh", "da", "Danish", "de", "German", "dv", "Divehi", "dz", "Dzongkha", "ee", "Ewe", "el", "Greek", "en", "English", "eo", "Esperanto", "es", "Spanish", "et", "Estonian", "eu", "Basque", "fa", "Persian", "ff", "Fulah", "fi", "Finnish", "fj", "Fijian", "fo", "Faroese", "fr", "French", "fy", "Western Frisian", "ga", "Irish", "gd", "Scottish", "gl", "Galician", "gn", "Guaran\ufffd", "gu", "Gujarati", "gv", "Manx", "ha", "Hausa", "he", "Hebrew", "hi", "Hindi", "ho", "Hiri", "hr", "Croatian", "ht", "Haitian", "hu", "Hungarian", "hy", "Armenian", "hz", "Herero", "ia", "Interlingua", "id", "Indonesian", "ie", "Interlingue", "ig", "Igbo", "ii", "Sichuan", "ik", "Inupiaq", "io", "Ido", "is", "Icelandic", "it", "Italian", "iu", "Inuktitut", "ja", "Japanese", "jv", "Javanese", "ka", "Georgian", "kg", "Kongo", "ki", "Kikuyu", "kj", "Kwanyama", "kk", "Kazakh", "kl", "Kalaallisut", "km", "Khmer", "kn", "Kannada", "ko", "Korean", "kr", "Kanuri", "ks", "Kashmiri", "ku", "Kurdish", "kv", "Komi", "kw", "Cornish", "ky", "Kirghiz", "la", "Latin", "lb", "Luxembourgish", "lg", "Ganda", "li", "Limburgish", "ln", "Lingala", "lo", "Lao", "lt", "Lithuanian", "lu", "Luba-Katanga", "lv", "Latvian", "mg", "Malagasy", "mh", "Marshallese", "mi", "Maori", "mk", "Macedonian", "ml", "Malayalam", "mn", "Mongolian", "mo", "Moldavian", "mr", "Marathi", "ms", "Malay", "mt", "Maltese", "my", "Burmese", "na", "Nauru", "nb", "Norwegian", "nd", "North", "ne", "Nepali", "ng", "Ndonga", "nl", "Dutch", "nn", "Norwegian", "no", "Norwegian", "nr", "South", "nv", "Navajo", "ny", "Chichewa", "oc", "Occitan", "oj", "Ojibwa", "om", "Oromo", "or", "Oriya", "os", "Ossetian", "pa", "Panjabi", "pi", "Pali", "pl", "Polish", "ps", "Pashto", "pt", "Portuguese", "qu", "Quechua", "rm", "Raeto-Romance", "rn", "Kirundi", "ro", "Romanian", "ru", "Russian", "rw", "Kinyarwanda", "ry", "Rusyn", "sa", "Sanskrit", "sc", "Sardinian", "sd", "Sindhi", "se", "Northern", "sg", "Sango", "sh", "Serbo-Croatian", "si", "Sinhalese", "sk", "Slovak", "sl", "Slovenian", "sm", "Samoan", "sn", "Shona", "so", "Somali", "sq", "Albanian", "sr", "Serbian", "ss", "Swati", "st", "Sotho", "su", "Sundanese", "sv", "Swedish", "sw", "Swahili", "ta", "Tamil", "te", "Telugu", "tg", "Tajik", "th", "Thai", "ti", "Tigrinya", "tk", "Turkmen", "tl", "Tagalog", "tn", "Tswana", "to", "Tonga", "tr", "Turkish", "ts", "Tsonga", "tt", "Tatar", "tw", "Twi", "ty", "Tahitian", "ug", "Uighur", "uk", "Ukrainian", "ur", "Urdu", "uz", "Uzbek", "ve", "Venda", "vi", "Vietnamese", "vo", "Volap\u00fck", "wa", "Walloon", "wo", "Wolof", "xh", "Xhosa", "yi", "Yiddish", "yo", "Yoruba", "za", "Zhuang", "zh", "Chinese", "zu", "Zulu");
        nationality2country = new FinalMap<String, String>("African", "Africa", "Antarctic", "Antarctica", "Americana", "Americas", "Asian", "Asia", "Middle Eastern", "Middle East", "Australasian", "Australasia", "Australian", "Australia", "Eurasian", "Eurasia", "European", "Europe", "North American", "North America", "Oceanian", "Oceania", "South American", "South America", "Afghan", "Afghanistan", "Albanian", "Albania", "Algerian", "Algeria", "American Samoan", "American Samoa", "Andorran", "Andorra", "Angolan", "Angola", "Anguillan", "Anguilla", "Antiguan", "Antigua and Barbuda", "Argentine", "Argentina", "Argentinean", "Argentina", "Argentinian", "Argentina", "Armenian", "Armenia", "Aruban", "Aruba", "Austrian", "Austria", "Azerbaijani", "Azerbaijan", "Azeri", "Azerbaijan", "Bahamian", "Bahamas", "Bahraini", "Bahrain", "Bangladeshi", "Bangladesh", "Barbadian", "Barbados", "Bajan", "Barbados", "Belarusian", "Belarus", "Belgian", "Belgium", "Belizean", "Belize", "Beninese", "Benin", "Bermudian", "Bermuda", "Bermudan", "Bermuda", "Bhutanese", "Bhutan", "Bolivian", "Bolivia", "Bosnian", "Bosnia and Herzegovina", "Bosniak", "Bosnia and Herzegovina", "Herzegovinian", "Bosnia and Herzegovina", "Botswanan", "Botswana", "Brazilian", "Brazil", "British Virgin Island", "British Virgin Islands", "Bruneian", "Brunei", "Bulgarian", "Bulgaria", "Burkinabe", "Burkina Fasoa", "Burmese", "Burmab", "Burundian", "Burundi", "Cambodian", "Cambodia", "Cameroonian", "Cameroon", "Canadian", "Canada", "Cape Verdean", "Cape Verde", "Caymanian", "Cayman Islands", "Central African", "Central African Republic", "Chadian", "Chad", "Chilean", "Chile", "Chinese", "People's Republic of China", "See Taiwan", "Republic of China", "Christmas Island", "Christmas Island", "Cocos Island", "Cocos (Keeling) Islands", "Colombian", "Colombia", "Comorian", "Comoros", "Congolese", "Democratic Republic of the Congo", "Cook Island", "Cook Islands", "Costa Rican", "Costa Rica", "Ivorian", "C\u00f4te d'Ivoire", "Croatian", "Croatia", "Cuban", "Cuba", "Cypriot", "Cyprus", "Czech", "Czech Republic", "Danish", "Denmark", "Djiboutian", "Djibouti", "Dominicand", "Dominica", "Dominicane", "Dominican Republic", "Timorese", "East Timor", "Ecuadorian", "Ecuador", "Egyptian", "Egypt", "Salvadoran", "El Salvador", "English", "England", "Equatorial Guinean", "Equatorial Guinea", "Eritrean", "Eritrea", "Estonian", "Estonia", "Ethiopian", "Ethiopia", "Falkland Island", "Falkland Islands", "Faroese", "Faroe Islands", "Fijian", "Fiji", "Finnish", "Finland", "French", "France", "French Guianese", "French Guiana", "French Polynesian", "French Polynesia", "Gabonese", "Gabon", "Gambian", "Gambia", "Georgian", "Georgia", "German", "Germany", "Ghanaian", "Ghana", "Gibraltar", "Gibraltar", "Greek", "Greece", "Greenlandic", "Greenland", "Grenadian", "Grenada", "Guadeloupe", "Guadeloupe", "Guamanian", "Guam", "Guatemalan", "Guatemala", "Guinean", "Guinea", "Guyanese", "Guyana", "Haitian", "Haiti", "Honduran", "Honduras", "Hong Kong", "Hong Kong", "Hungarian", "Hungary", "Icelandic", "Iceland", "Indian", "India", "Indonesian", "Indonesia", "Iranian", "Iran", "Iraqi", "Iraq", "Manx", "Isle of Man", "Israeli", "Israel", "Italian", "Italy", "Jamaican", "Jamaica", "Japanese", "Japan", "Jordanian", "Jordan", "Kazakhstaniz", "Kazakhstan", "Kenyan", "Kenya", "I-Kiribati", "Kiribati", "North Korean", "North Korea", "South Korean", "South Korea", "Kosovar", "Kosovo", "Kuwaiti", "Kuwait", "Kyrgyzstani", "Kyrgyzstan", "Laotian", "Laos", "Latvian", "Latvia", "Lebanese", "Lebanon", "Basotho", "Lesotho", "Liberian", "Liberia", "Libyan", "Libya", "Liechtenstein", "Liechtenstein", "Lithuanian", "Lithuania", "Luxembourg", "Luxembourg", "Macanese", "Macau", "Macedonian", "Republic of Macedonia", "Malagasy", "Madagascar", "Malawian", "Malawi", "Malaysian", "Malaysia", "Maldivian", "Maldives", "Malian", "Mali", "Maltese", "Malta", "Marshallese", "Marshall Islands", "Martiniquais", "Martinique", "Mauritanian", "Mauritania", "Mauritian", "Mauritius", "Mahoran", "Mayotte", "Mexican", "Mexico", "Micronesian", "Micronesia", "Moldovan", "Moldova", "Mon\u00e9gasque", "Monaco", "Mongolian", "Mongolia", "Montenegrin", "Montenegro", "Montserratian", "Montserrat", "Moroccan", "Morocco", "Mozambican", "Mozambique", "Namibian", "Namibia", "Nauruan", "Nauru", "Nepali", "Nepal", "Dutch", "Netherlands", "Dutch Antillean", "Netherlands Antilles", "New Caledonian", "New Caledonia", "New Zealand", "New Zealand", "Nicaraguan", "Nicaragua", "Niuean", "Niue", "Nigerien", "Niger", "Nigerian", "Nigeria", "Norwegian", "Norway", "Northern Irish", "Northern Ireland", "Northern Marianan", "Northern Marianas", "Omani", "Oman", "Pakistani", "Pakistan", "Palestinian", "Palestinian territories", "Palauan", "Palau", "Panamanian", "Panama", "Papua New Guinean", "Papua New Guinea", "Paraguayan", "Paraguay", "Peruvian", "Peru", "Philippine", "Philippines", "Filipino", "Philippines", "Pitcairn Island", "Pitcairn Island", "Polish", "Poland", "Portuguese", "Portugal", "Puerto Rican", "Puerto Rico", "Qatari", "Qatar", "Irish", "Republic of Ireland", "R\u00e9unionese", "R\u00e9union", "Romanian", "Romania", "Russian", "Russia", "Rwandan", "Rwanda", "St. Helenian", "St. Helena", "Kittitian", "St. Kitts and Nevis", "St. Lucian", "St. Lucia", "Saint-Pierrais", "Saint-Pierre and Miquelon", "St. Vincentian", "St. Vincent and the Grenadines", "Samoan", "Samoa", "Sammarinese", "San Marino", "S\u00e3o Tom\u00e9an", "S\u00e3o Tom\u00e9 and Pr\u00edncipe", "Saudi", "Saudi Arabia", "Scottish", "Scotland", "Senegalese", "Senegal", "Serbian", "Serbia", "Seychellois", "Seychelles", "Sierra Leonean", "Sierra Leone", "Singaporean", "Singapore", "Slovak", "Slovakia", "Slovene", "Slovenia", "Slovenian", "Slovenia", "Solomon Island", "Solomon Islands", "Somali", "Somalia", "Somaliland", "Somaliland", "South African", "South Africa", "Spanish", "Spain", "Sri Lankan", "Sri Lanka", "Sudanese", "Sudan", "Surinamese", "Surinam", "Swazi", "Swaziland", "Swedish", "Sweden", "Swiss", "Switzerland", "Syrian", "Syria", "Taiwanese", "Taiwan", "Tajikistani", "Tajikistan", "Tanzanian", "Tanzania", "Thai", "Thailand", "Togolese", "Togo", "Tongan", "Tonga", "Trinidadian", "Trinidad and Tobago", "Tunisian", "Tunisia", "Turkish", "Turkey", "Turkmen", "Turkmenistan", "Tuvaluan", "Tuvalu", "Ugandan", "Uganda", "Ukrainian", "Ukraine", "Emirati", "United Arab Emirates", "British", "United Kingdom", "American", "United States of America", "Uruguayan", "Uruguay", "Uzbekistani", "Uzbekistan", "Uzbek", "Uzbekistan", "Vanuatuan", "Vanuatu", "Venezuelan", "Venezuela", "Vietnamese", "Vietnam", "Virgin Island", "Virgin Islands", "Welsh", "Wales", "Wallisian", "Wallis and Futuna", "Sahrawi", "Western Sahara", "Yemeni", "Yemen", "Zambian", "Zambia", "Zimbabwean", "Zimbabwe");
    }

    public static class PersonNameML
    extends NameML {
        protected String myTitles;
        protected String myGivenNames;
        protected String myFamilyNamePrefix;
        protected String myAttributePrefix;
        protected String myFamilyName;
        protected String myAttribute;
        protected String myFamilyNameSuffix;
        protected String myRoman;
        protected String myCity;
        protected String myNickname;

        protected static String getComponent(Matcher m, int n) {
            if (m.group(n) == null || m.group(n).length() == 0) {
                return null;
            }
            String result = m.group(n);
            if (result.matches(".+(?:[\\s_]++)")) {
                return result.substring(0, result.length() - 1);
            }
            if (result.matches("(?:[\\s_]++).+")) {
                return result.substring(1);
            }
            return result;
        }

        public PersonNameML(String s, Language lang) {
            super(s);
            Set titlesForGivenNames;
            Matcher m;
            s = s.replace('_', ' ');
            if (lang.equals(Language.ENGLISH)) {
                m = laxPersonNamePatternEn.matcher(s);
            } else if (lang.equals(Language.FRENCH)) {
                m = laxPersonNamePatternFr.matcher(s);
            } else if (lang.equals(Language.SPANISH)) {
                m = laxPersonNamePatternEs.matcher(s);
            } else if (lang.equals(Language.GERMAN)) {
                m = laxPersonNamePatternDe.matcher(s);
            } else if (lang.equals(Language.ITALIAN)) {
                m = laxPersonNamePatternIt.matcher(s);
            } else {
                throw new IllegalArgumentException("Unsupported language");
            }
            if (!m.matches()) {
                return;
            }
            this.myTitles = PersonNameML.getComponent(m, 1);
            this.myGivenNames = PersonNameML.getComponent(m, 2);
            this.myNickname = PersonNameML.getComponent(m, 3);
            this.myFamilyName = PersonNameML.getComponent(m, 6);
            this.myFamilyNamePrefix = PersonNameML.getComponent(m, 5);
            String attr = PersonNameML.getComponent(m, 4);
            if (attr != null) {
                this.myAttributePrefix = attr;
                this.myAttribute = this.myFamilyName;
                this.myFamilyName = null;
            }
            this.myFamilyNameSuffix = PersonNameML.getComponent(m, 7);
            this.myRoman = PersonNameML.getComponent(m, 8);
            this.myCity = PersonNameML.getComponent(m, 9);
            if (this.myNickname == null) {
                this.myNickname = PersonNameML.getComponent(m, 10);
            }
            if (lang.equals(Language.ENGLISH)) {
                titlesForGivenNames = titlesForGivenNamesEn;
            } else if (lang.equals(Language.FRENCH)) {
                titlesForGivenNames = titlesForGivenNamesFr;
            } else if (lang.equals(Language.SPANISH)) {
                titlesForGivenNames = titlesForGivenNamesEs;
            } else if (lang.equals(Language.GERMAN)) {
                titlesForGivenNames = titlesForGivenNamesDe;
            } else if (lang.equals(Language.ITALIAN)) {
                titlesForGivenNames = titlesForGivenNamesIt;
            } else {
                throw new IllegalArgumentException("Unsupported language");
            }
            if (this.myGivenNames == null && this.myTitles != null && titlesForGivenNames.contains(this.myTitles.toLowerCase())) {
                this.myGivenNames = this.myFamilyName;
                this.myFamilyName = null;
            }
            if (this.myGivenNames == null && this.myRoman != null) {
                this.myGivenNames = this.myFamilyName;
                this.myFamilyName = null;
            }
            if (this.myFamilyName != null && this.myGivenNames != null && familyNameSuffixPattern.matcher(this.myFamilyName).matches()) {
                String[] g = this.myGivenNames.split(NameML.B);
                this.myFamilyNameSuffix = this.myFamilyName;
                this.myFamilyName = g[g.length - 1];
                this.myGivenNames = g.length == 1 ? null : this.myGivenNames.substring(0, this.myGivenNames.length() - this.myFamilyName.length());
            }
        }

        public String givenName() {
            if (this.myGivenNames == null) {
                return null;
            }
            if (this.myGivenNames.indexOf(32) == -1) {
                return this.myGivenNames;
            }
            return this.myGivenNames.substring(0, this.myGivenNames.indexOf(32));
        }

        public String attribute() {
            return this.myAttribute;
        }

        public String attributePrefix() {
            return this.myAttributePrefix;
        }

        public String city() {
            return this.myCity;
        }

        public String nickname() {
            return this.myNickname;
        }

        public String familyName() {
            return this.myFamilyName;
        }

        public String familyNamePrefix() {
            return this.myFamilyNamePrefix;
        }

        public String familyNameSuffix() {
            return this.myFamilyNameSuffix;
        }

        public String givenNames() {
            return this.myGivenNames;
        }

        public String roman() {
            return this.myRoman;
        }

        public String titles() {
            return this.myTitles;
        }

        @Override
        public String normalize() {
            String given = this.givenNames();
            if (this.myFamilyName != null) {
                String family = this.myFamilyName;
                if (this.myFamilyNameSuffix != null && this.myFamilyNameSuffix.matches("[jJ].*")) {
                    family = family + ", Jr.";
                } else if (this.myFamilyNameSuffix != null && this.myFamilyNameSuffix.matches("[sS].*")) {
                    family = family + ", Sr.";
                }
                if (given != null) {
                    family = given + ' ' + family;
                }
                return family;
            }
            if (given != null) {
                if (this.myRoman != null && given != null) {
                    given = given + ' ' + this.myRoman;
                }
                if (this.myAttribute != null && given != null) {
                    given = given + ' ' + this.myAttribute;
                }
                return given;
            }
            return this.original();
        }

        @Override
        public String describe() {
            return "PersonName\n  Original: " + this.original + "\n  Titles: " + this.titles() + "\n  Given Name: " + this.givenName() + "\n  Given Names: " + this.givenNames() + "\n  Nickname: " + this.nickname() + "\n  Family Name Prefix: " + this.familyNamePrefix() + "\n  Attribute Prefix: " + this.attributePrefix() + "\n  Family Name: " + this.familyName() + "\n  Attribute: " + this.attribute() + "\n  Family Name Suffix: " + this.familyNameSuffix() + "\n  Roman: " + this.roman() + "\n  City: " + this.city() + "\n  Normalized: " + this.normalize();
        }
    }

    public static class CompanyNameML
    extends NameML {
        protected String name;
        protected String suffix;

        public CompanyNameML(String s) {
            super(s);
            Matcher m = laxCompanyPattern.matcher(s);
            if (!m.matches()) {
                return;
            }
            this.name = m.group(1);
            this.suffix = m.group(2);
        }

        public String name() {
            return this.name;
        }

        public String suffix() {
            return this.suffix;
        }

        @Override
        public String normalize() {
            return this.name;
        }

        @Override
        public String describe() {
            return "CompanyName\n  Original: " + this.original + "\n  Name: " + this.name + "\n  Suffix: " + this.suffix + "\n  Normalized: " + this.normalize();
        }
    }

    public static class AbbreviationML
    extends NameML {
        public AbbreviationML(String s) {
            super(s);
            if (!laxAbbreviationPattern.matcher(s).matches()) {
                return;
            }
        }

        @Override
        public String normalize() {
            if (this.normalized == null) {
                this.normalized = super.normalize().toUpperCase();
            }
            return this.normalized;
        }

        @Override
        public String describe() {
            return "Abbreviation\n  Original: " + this.original + "\n  Normalized: " + this.normalize();
        }
    }
}

