001/*
002 * Copyright 2011 Atteo.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
005 * in compliance with the License. You may obtain a copy of the License at
006 *
007 * http://www.apache.org/licenses/LICENSE-2.0
008 *
009 * Unless required by applicable law or agreed to in writing, software distributed under the License
010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
011 * or implied. See the License for the specific language governing permissions and limitations under
012 * the License.
013 */
014package org.atteo.evo.inflector;
015
016/**
017 * Transforms English words from singular to plural form.
018 * <p>
019 * Examples:
020 * <pre>
021 *    English.plural("word") = "words";
022 *
023 *    English.plural("cat", 1) = "cat";
024 *    English.plural("cat", 2) = "cats";
025 * </pre>
026 * </p>
027 * <p>
028 * Based on <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html">
029 * An Algorithmic Approach to English Pluralization</a> by Damian Conway.
030 * </p>
031 */
032public class English extends TwoFormInflector {
033    public static enum MODE {
034        ENGLISH_ANGLICIZED, ENGLISH_CLASSICAL
035    }
036
037    private static final String[] CATEGORY_EX_ICES = { "codex", "murex",
038            "silex", };
039
040    private static final String[] CATEGORY_IX_ICES = { "radix", "helix", };
041
042    private static final String[] CATEGORY_UM_A = { "bacterium",
043            "agendum", "desideratum", "erratum", "stratum", "datum", "ovum",
044            "extremum", "candelabrum", };
045
046    // Always us -> i
047    private static final String[] CATEGORY_US_I = { "alumnus", "alveolus",
048            "bacillus", "bronchus", "locus", "nucleus", "stimulus", "meniscus",
049            "thesaurus", };
050
051    private static final String[] CATEGORY_ON_A = { "criterion",
052            "perihelion", "aphelion", "phenomenon", "prolegomenon", "noumenon",
053            "organon", "asyndeton", "hyperbaton", };
054
055    private static final String[] CATEGORY_A_AE = { "alumna", "alga",
056            "vertebra", "persona" };
057
058    // Always o -> os
059    private static final String[] CATEGORY_O_OS = { "albino",
060            "archipelago", "armadillo", "commando", "crescendo", "fiasco",
061            "ditto", "dynamo", "embryo", "ghetto", "guano", "inferno", "jumbo",
062            "lumbago", "magneto", "manifesto", "medico", "octavo", "photo",
063            "pro", "quarto", "canto", "lingo", "generalissimo", "stylo",
064            "rhino", "casino", "auto", "macro", "zero",
065    };
066
067    // Classical o -> i  (normally -> os)
068    private static final String[] CATEGORY_O_I = {
069            "solo", "soprano", "basso", "alto", "contralto", "tempo", "piano",
070            "virtuoso", };
071
072    private static final String[] CATEGORY_EN_INA = {
073            "stamen", "foramen", "lumen"
074    };
075
076    // -a to -as (anglicized) or -ata (classical)
077    private static final String[] CATEGORY_A_ATA = {
078            "anathema", "enema", "oedema", "bema", "enigma", "sarcoma",
079            "carcinoma", "gumma", "schema", "charisma", "lemma", "soma",
080            "diploma", "lymphoma", "stigma", "dogma", "magma", "stoma",
081            "drama", "melisma", "trauma", "edema", "miasma"
082    };
083
084    private static final String[] CATEGORY_IS_IDES = {
085            "iris", "clitoris"
086    };
087
088    // -us to -uses (anglicized) or -us (classical)
089    private static final String[] CATEGORY_US_US = {
090            "apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus",
091            "plexus", "status", "hiatus"
092    };
093
094    private static final String[] CATEGORY_NONE_I = {
095        "afreet", "afrit", "efreet"
096    };
097
098    private static final String[] CATEGORY_NONE_IM = {
099        "cherub", "goy", "seraph"
100    };
101
102    private static final String[] CATEGORY_EX_EXES = {
103        "apex", "latex", "vertex", "cortex", "pontifex", "vortex", "index", "simplex"
104    };
105
106    private static final String[] CATEGORY_IX_IXES = {
107        "appendix"
108    };
109
110    private static final String[] CATEGORY_S_ES = {
111        "acropolis", "chaos", "lens", "aegis",
112        "cosmos", "mantis", "alias", "dais", "marquis", "asbestos",
113        "digitalis", "metropolis", "atlas", "epidermis", "pathos",
114        "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis",
115        "glottis", "rhinoceros", "cannabis", "glottis", "sassafras",
116        "canvas", "ibis", "trellis"
117    };
118
119    private static final String[] CATEGORY_MAN_MANS = {
120        "human", "Alabaman", "Bahaman", "Burman", "German", "Hiroshiman", "Liman", "Nakayaman", "Oklahoman",
121        "Panaman", "Selman", "Sonaman", "Tacoman", "Yakiman", "Yokohaman", "Yuman"
122    };
123
124    private static English inflector = new English();
125
126
127    public English() {
128        this(MODE.ENGLISH_ANGLICIZED);
129    }
130
131    public English(MODE mode) {
132        // 2. Handle words that do not inflect in the plural (such as fish, travois, chassis, nationalities ending
133        // in -ese etc.
134
135        rule("(fish|ois|sheep|deer|pox|itis)$", "$1");
136
137        uncountable(new String[] { "bison", "flounder", "pliers", "bream",
138                "gallows", "proceedings", "breeches", "graffiti", "rabies",
139                "britches", "headquarters", "salmon", "carp", "herpes",
140                "scissors", "chassis", "high-jinks", "sea-bass", "clippers",
141                "homework", "series", "cod", "innings", "shears",
142                "contretemps", "jackanapes", "species", "corps", "mackerel",
143                "swine", "debris", "measles", "trout", "diabetes", "mews",
144                "tuna", "djinn", "mumps", "whiting", "eland", "news",
145                "wildebeest", "elk", "pincers", "sugar" });
146
147        // 4. Handle standard irregular plurals (mongooses, oxen, etc.)
148
149        irregular(new String[][] {
150                { "child", "children" }, // classical
151                { "ephemeris", "ephemerides" }, // classical
152                { "mongoose", "mongoose" }, // anglicized
153                { "mythos", "mythoi" }, // classical
154                // TODO: handle entire word correctly
155                //{ "ox", "oxen" }, // classical
156                { "soliloquy", "soliloquies" }, // anglicized
157                { "trilby", "trilbys" }, // anglicized
158                { "genus", "genera" }, // classical
159                { "quiz", "quizzes" },
160        });
161
162        if (mode == MODE.ENGLISH_ANGLICIZED) {
163            // Anglicized plural
164            irregular(new String[][] {
165                    { "beef", "beefs" },
166                    { "brother", "brothers" },
167                    { "cow", "cows" },
168                    { "genie", "genies" },
169                    { "money", "moneys" },
170                    { "octopus", "octopuses" },
171                    { "opus", "opuses" },
172                });
173        } else if (mode == MODE.ENGLISH_CLASSICAL) {
174            // Classical plural
175            irregular(new String[][] { { "beef", "beeves"},
176                    { "brother", "brethren" },
177                    { "cos", "kine" }, { "genie", "genii"},
178                    { "money", "monies" },
179                    { "octopus", "octopodes" },
180                    { "opus", "opera" },
181            });
182        }
183
184        categoryRule(CATEGORY_MAN_MANS, "(.*)$", "$1s");
185
186        // questionable
187        /*
188         rule(new String[][] {
189                { "(ness)$", "$1" },
190                { "(ality)$", "$1" }
191                { "(icity)$", "$1" },
192                { "(ivity)$", "$1" },
193        });
194         */
195        // 5. Handle irregular inflections for common suffixes
196        rule(new String[][] {
197                { "man$", "men" },
198                { "([lm])ouse$", "$1ice" },
199                { "tooth$", "teeth" },
200                { "goose$", "geese" },
201                { "foot$", "feet" },
202                { "zoon$", "zoa" },
203                { "([csx])is$", "$1es" },
204        });
205
206        // 6. Handle fully assimilated classical inflections
207        categoryRule(CATEGORY_EX_ICES, "(.*)ex$", "$1ices");
208        categoryRule(CATEGORY_IX_ICES, "(.*)ix$", "$1ices");
209        categoryRule(CATEGORY_UM_A, "(.*)um$", "$1a");
210        categoryRule(CATEGORY_ON_A, "(.*)on$", "$1a");
211        categoryRule(CATEGORY_A_AE, "(.*)a$", "$1ae");
212
213        // 7. Handle classical variants of modern inflections
214        if (mode == MODE.ENGLISH_CLASSICAL) {
215            rule(new String[][]{
216                    { "trix$", "trices" },
217                    { "eau$", "eaux" },
218                    { "ieu$", "ieux" },
219                    { "(..[iay])nx$", "$1nges" },
220            });
221            categoryRule(CATEGORY_EN_INA, "(.*)en$", "$1ina");
222            categoryRule(CATEGORY_A_ATA, "(.*)a$", "$1ata");
223            categoryRule(CATEGORY_IS_IDES, "(.*)is$", "$1ides");
224            categoryRule(CATEGORY_US_US, "", "");
225            categoryRule(CATEGORY_O_I, "(.*)o$", "$1i");
226            categoryRule(CATEGORY_NONE_I, "(.*)$", "$1i");
227            categoryRule(CATEGORY_NONE_IM, "(.*)$", "$1im");
228            categoryRule(CATEGORY_EX_EXES, "(.*)ex$", "$1ices");
229            categoryRule(CATEGORY_IX_IXES, "(.*)ix$", "$1ices");
230        }
231
232        categoryRule(CATEGORY_US_I, "(.*)us$", "$1i");
233
234        rule("([cs]h|[zx])$", "$1es");
235        categoryRule(CATEGORY_S_ES, "(.*)$", "$1es");
236        categoryRule(CATEGORY_IS_IDES, "(.*)$", "$1es");
237        categoryRule(CATEGORY_US_US, "(.*)$", "$1es");
238        rule("(us)$", "$1es");
239        categoryRule(CATEGORY_A_ATA, "(.*)$", "$1s");
240
241        // The suffixes -ch, -sh, and -ss all take -es in the plural (churches,
242        // classes, etc)...
243        rule(new String[][] { { "([cs])h$", "$1hes" }, { "ss$", "sses" } });
244
245        // Certain words ending in -f or -fe take -ves in the plural (lives,
246        // wolves, etc)...
247        rule(new String[][] {
248                { "([aeo]l)f$", "$1ves" },
249                { "([^d]ea)f$", "$1ves" },
250                { "(ar)f$", "$1ves" },
251                { "([nlw]i)fe$", "$1ves" }
252        });
253
254        // Words ending in -y take -ys
255        rule(new String[][] { { "([aeiou])y$", "$1ys" }, { "y$", "ies" }, });
256
257        // Some words ending in -o take -os (including does preceded by a vowel)
258        categoryRule(CATEGORY_O_I, "(.*)o$", "$1os");
259        categoryRule(CATEGORY_O_OS, "(.*)o$", "$1os");
260        rule("([aeiou])o$", "$1os");
261        // The rest take -oes
262        rule("o$", "oes");
263
264        categoryRule(CATEGORY_A_ATA, "(.*)$", "$1es");
265
266        // Otherwise, assume that the plural just adds -s
267        rule("(.*)$", "$1s");
268    }
269
270    /**
271     * Returns plural form of the given word.
272     *
273     * @param word word in singular form
274     * @return plural form of the word
275     */
276    @Override
277    public String getPlural(String word) {
278        return super.getPlural(word);
279    }
280
281    /**
282     * Returns singular or plural form of the word based on count.
283     *
284     * @param word word in singular form
285     * @param count word count
286     * @return form of the word correct for given count
287     */
288    public String getPlural(String word, int count) {
289        if (count == 1) {
290            return word;
291        }
292        return getPlural(word);
293    }
294
295    /**
296     * Returns plural form of the given word.
297     * <p>
298     * For instance:
299     * <pre>
300     * {@code
301     * English.plural("cat") == "cats";
302     * }
303     * </pre>
304     * </p>
305     * @param word word in singular form
306     * @return plural form of given word
307     */
308    public static String plural(String word) {
309        return inflector.getPlural(word);
310    }
311
312    /**
313     * Returns singular or plural form of the word based on count.
314     * <p>
315     * For instance:
316     * <pre>
317     * {@code
318     * English.plural("cat", 1) == "cat";
319     * English.plural("cat", 2) == "cats";
320     * }
321     * </pre>
322     * </p>
323     * @param word word in singular form
324     * @param count word count
325     * @return form of the word correct for given count
326     */
327    public static String plural(String word, int count) {
328        return inflector.getPlural(word, count);
329    }
330
331    public static void setMode(MODE mode) {
332        English newInflector = new English(mode);
333        inflector = newInflector;
334    }
335}