001/* 002 * Copyright 2011 Atteo. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014package org.atteo.evo.inflector; 015 016/** 017 * Transforms English words from singular to plural form. 018 * <p> 019 * Examples: 020 * <pre> 021 * English.plural("word") = "words"; 022 * 023 * English.plural("cat", 1) = "cat"; 024 * English.plural("cat", 2) = "cats"; 025 * </pre> 026 * </p> 027 * <p> 028 * Based on <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html"> 029 * An Algorithmic Approach to English Pluralization</a> by Damian Conway. 030 * </p> 031 */ 032public class English extends TwoFormInflector { 033 public static enum MODE { 034 ENGLISH_ANGLICIZED, ENGLISH_CLASSICAL 035 } 036 037 private static final String[] CATEGORY_EX_ICES = { "codex", "murex", 038 "silex", }; 039 040 private static final String[] CATEGORY_IX_ICES = { "radix", "helix", }; 041 042 private static final String[] CATEGORY_UM_A = { "bacterium", 043 "agendum", "desideratum", "erratum", "stratum", "datum", "ovum", 044 "extremum", "candelabrum", }; 045 046 // Always us -> i 047 private static final String[] CATEGORY_US_I = { "alumnus", "alveolus", 048 "bacillus", "bronchus", "locus", "nucleus", "stimulus", "meniscus", 049 "thesaurus", }; 050 051 private static final String[] CATEGORY_ON_A = { "criterion", 052 "perihelion", "aphelion", "phenomenon", "prolegomenon", "noumenon", 053 "organon", "asyndeton", "hyperbaton", }; 054 055 private static final String[] CATEGORY_A_AE = { "alumna", "alga", 056 "vertebra", "persona" }; 057 058 // Always o -> os 059 private static final String[] CATEGORY_O_OS = { "albino", 060 "archipelago", "armadillo", "commando", "crescendo", "fiasco", 061 "ditto", "dynamo", "embryo", "ghetto", "guano", "inferno", "jumbo", 062 "lumbago", "magneto", "manifesto", "medico", "octavo", "photo", 063 "pro", "quarto", "canto", "lingo", "generalissimo", "stylo", 064 "rhino", "casino", "auto", "macro", "zero", 065 }; 066 067 // Classical o -> i (normally -> os) 068 private static final String[] CATEGORY_O_I = { 069 "solo", "soprano", "basso", "alto", "contralto", "tempo", "piano", 070 "virtuoso", }; 071 072 private static final String[] CATEGORY_EN_INA = { 073 "stamen", "foramen", "lumen" 074 }; 075 076 // -a to -as (anglicized) or -ata (classical) 077 private static final String[] CATEGORY_A_ATA = { 078 "anathema", "enema", "oedema", "bema", "enigma", "sarcoma", 079 "carcinoma", "gumma", "schema", "charisma", "lemma", "soma", 080 "diploma", "lymphoma", "stigma", "dogma", "magma", "stoma", 081 "drama", "melisma", "trauma", "edema", "miasma" 082 }; 083 084 private static final String[] CATEGORY_IS_IDES = { 085 "iris", "clitoris" 086 }; 087 088 // -us to -uses (anglicized) or -us (classical) 089 private static final String[] CATEGORY_US_US = { 090 "apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus", 091 "plexus", "status", "hiatus" 092 }; 093 094 private static final String[] CATEGORY_NONE_I = { 095 "afreet", "afrit", "efreet" 096 }; 097 098 private static final String[] CATEGORY_NONE_IM = { 099 "cherub", "goy", "seraph" 100 }; 101 102 private static final String[] CATEGORY_EX_EXES = { 103 "apex", "latex", "vertex", "cortex", "pontifex", "vortex", "index", "simplex" 104 }; 105 106 private static final String[] CATEGORY_IX_IXES = { 107 "appendix" 108 }; 109 110 private static final String[] CATEGORY_S_ES = { 111 "acropolis", "chaos", "lens", "aegis", 112 "cosmos", "mantis", "alias", "dais", "marquis", "asbestos", 113 "digitalis", "metropolis", "atlas", "epidermis", "pathos", 114 "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis", 115 "glottis", "rhinoceros", "cannabis", "glottis", "sassafras", 116 "canvas", "ibis", "trellis" 117 }; 118 119 private static final String[] CATEGORY_MAN_MANS = { 120 "human", "Alabaman", "Bahaman", "Burman", "German", "Hiroshiman", "Liman", "Nakayaman", "Oklahoman", 121 "Panaman", "Selman", "Sonaman", "Tacoman", "Yakiman", "Yokohaman", "Yuman" 122 }; 123 124 private static English inflector = new English(); 125 126 127 public English() { 128 this(MODE.ENGLISH_ANGLICIZED); 129 } 130 131 public English(MODE mode) { 132 // 2. Handle words that do not inflect in the plural (such as fish, travois, chassis, nationalities ending 133 // in -ese etc. 134 135 rule("(fish|ois|sheep|deer|pox|itis)$", "$1"); 136 137 uncountable(new String[] { "bison", "flounder", "pliers", "bream", 138 "gallows", "proceedings", "breeches", "graffiti", "rabies", 139 "britches", "headquarters", "salmon", "carp", "herpes", 140 "scissors", "chassis", "high-jinks", "sea-bass", "clippers", 141 "homework", "series", "cod", "innings", "shears", 142 "contretemps", "jackanapes", "species", "corps", "mackerel", 143 "swine", "debris", "measles", "trout", "diabetes", "mews", 144 "tuna", "djinn", "mumps", "whiting", "eland", "news", 145 "wildebeest", "elk", "pincers", "sugar" }); 146 147 // 4. Handle standard irregular plurals (mongooses, oxen, etc.) 148 149 irregular(new String[][] { 150 { "child", "children" }, // classical 151 { "ephemeris", "ephemerides" }, // classical 152 { "mongoose", "mongoose" }, // anglicized 153 { "mythos", "mythoi" }, // classical 154 // TODO: handle entire word correctly 155 //{ "ox", "oxen" }, // classical 156 { "soliloquy", "soliloquies" }, // anglicized 157 { "trilby", "trilbys" }, // anglicized 158 { "genus", "genera" }, // classical 159 { "quiz", "quizzes" }, 160 }); 161 162 if (mode == MODE.ENGLISH_ANGLICIZED) { 163 // Anglicized plural 164 irregular(new String[][] { 165 { "beef", "beefs" }, 166 { "brother", "brothers" }, 167 { "cow", "cows" }, 168 { "genie", "genies" }, 169 { "money", "moneys" }, 170 { "octopus", "octopuses" }, 171 { "opus", "opuses" }, 172 }); 173 } else if (mode == MODE.ENGLISH_CLASSICAL) { 174 // Classical plural 175 irregular(new String[][] { { "beef", "beeves"}, 176 { "brother", "brethren" }, 177 { "cos", "kine" }, { "genie", "genii"}, 178 { "money", "monies" }, 179 { "octopus", "octopodes" }, 180 { "opus", "opera" }, 181 }); 182 } 183 184 categoryRule(CATEGORY_MAN_MANS, "(.*)$", "$1s"); 185 186 // questionable 187 /* 188 rule(new String[][] { 189 { "(ness)$", "$1" }, 190 { "(ality)$", "$1" } 191 { "(icity)$", "$1" }, 192 { "(ivity)$", "$1" }, 193 }); 194 */ 195 // 5. Handle irregular inflections for common suffixes 196 rule(new String[][] { 197 { "man$", "men" }, 198 { "([lm])ouse$", "$1ice" }, 199 { "tooth$", "teeth" }, 200 { "goose$", "geese" }, 201 { "foot$", "feet" }, 202 { "zoon$", "zoa" }, 203 { "([csx])is$", "$1es" }, 204 }); 205 206 // 6. Handle fully assimilated classical inflections 207 categoryRule(CATEGORY_EX_ICES, "(.*)ex$", "$1ices"); 208 categoryRule(CATEGORY_IX_ICES, "(.*)ix$", "$1ices"); 209 categoryRule(CATEGORY_UM_A, "(.*)um$", "$1a"); 210 categoryRule(CATEGORY_ON_A, "(.*)on$", "$1a"); 211 categoryRule(CATEGORY_A_AE, "(.*)a$", "$1ae"); 212 213 // 7. Handle classical variants of modern inflections 214 if (mode == MODE.ENGLISH_CLASSICAL) { 215 rule(new String[][]{ 216 { "trix$", "trices" }, 217 { "eau$", "eaux" }, 218 { "ieu$", "ieux" }, 219 { "(..[iay])nx$", "$1nges" }, 220 }); 221 categoryRule(CATEGORY_EN_INA, "(.*)en$", "$1ina"); 222 categoryRule(CATEGORY_A_ATA, "(.*)a$", "$1ata"); 223 categoryRule(CATEGORY_IS_IDES, "(.*)is$", "$1ides"); 224 categoryRule(CATEGORY_US_US, "", ""); 225 categoryRule(CATEGORY_O_I, "(.*)o$", "$1i"); 226 categoryRule(CATEGORY_NONE_I, "(.*)$", "$1i"); 227 categoryRule(CATEGORY_NONE_IM, "(.*)$", "$1im"); 228 categoryRule(CATEGORY_EX_EXES, "(.*)ex$", "$1ices"); 229 categoryRule(CATEGORY_IX_IXES, "(.*)ix$", "$1ices"); 230 } 231 232 categoryRule(CATEGORY_US_I, "(.*)us$", "$1i"); 233 234 rule("([cs]h|[zx])$", "$1es"); 235 categoryRule(CATEGORY_S_ES, "(.*)$", "$1es"); 236 categoryRule(CATEGORY_IS_IDES, "(.*)$", "$1es"); 237 categoryRule(CATEGORY_US_US, "(.*)$", "$1es"); 238 rule("(us)$", "$1es"); 239 categoryRule(CATEGORY_A_ATA, "(.*)$", "$1s"); 240 241 // The suffixes -ch, -sh, and -ss all take -es in the plural (churches, 242 // classes, etc)... 243 rule(new String[][] { { "([cs])h$", "$1hes" }, { "ss$", "sses" } }); 244 245 // Certain words ending in -f or -fe take -ves in the plural (lives, 246 // wolves, etc)... 247 rule(new String[][] { 248 { "([aeo]l)f$", "$1ves" }, 249 { "([^d]ea)f$", "$1ves" }, 250 { "(ar)f$", "$1ves" }, 251 { "([nlw]i)fe$", "$1ves" } 252 }); 253 254 // Words ending in -y take -ys 255 rule(new String[][] { { "([aeiou])y$", "$1ys" }, { "y$", "ies" }, }); 256 257 // Some words ending in -o take -os (including does preceded by a vowel) 258 categoryRule(CATEGORY_O_I, "(.*)o$", "$1os"); 259 categoryRule(CATEGORY_O_OS, "(.*)o$", "$1os"); 260 rule("([aeiou])o$", "$1os"); 261 // The rest take -oes 262 rule("o$", "oes"); 263 264 categoryRule(CATEGORY_A_ATA, "(.*)$", "$1es"); 265 266 // Otherwise, assume that the plural just adds -s 267 rule("(.*)$", "$1s"); 268 } 269 270 /** 271 * Returns plural form of the given word. 272 * 273 * @param word word in singular form 274 * @return plural form of the word 275 */ 276 @Override 277 public String getPlural(String word) { 278 return super.getPlural(word); 279 } 280 281 /** 282 * Returns singular or plural form of the word based on count. 283 * 284 * @param word word in singular form 285 * @param count word count 286 * @return form of the word correct for given count 287 */ 288 public String getPlural(String word, int count) { 289 if (count == 1) { 290 return word; 291 } 292 return getPlural(word); 293 } 294 295 /** 296 * Returns plural form of the given word. 297 * <p> 298 * For instance: 299 * <pre> 300 * {@code 301 * English.plural("cat") == "cats"; 302 * } 303 * </pre> 304 * </p> 305 * @param word word in singular form 306 * @return plural form of given word 307 */ 308 public static String plural(String word) { 309 return inflector.getPlural(word); 310 } 311 312 /** 313 * Returns singular or plural form of the word based on count. 314 * <p> 315 * For instance: 316 * <pre> 317 * {@code 318 * English.plural("cat", 1) == "cat"; 319 * English.plural("cat", 2) == "cats"; 320 * } 321 * </pre> 322 * </p> 323 * @param word word in singular form 324 * @param count word count 325 * @return form of the word correct for given count 326 */ 327 public static String plural(String word, int count) { 328 return inflector.getPlural(word, count); 329 } 330 331 public static void setMode(MODE mode) { 332 English newInflector = new English(mode); 333 inflector = newInflector; 334 } 335}