001/* 002 * Copyright 2011 Atteo. 003 * 004 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except 005 * in compliance with the License. You may obtain a copy of the License at 006 * 007 * http://www.apache.org/licenses/LICENSE-2.0 008 * 009 * Unless required by applicable law or agreed to in writing, software distributed under the License 010 * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 011 * or implied. See the License for the specific language governing permissions and limitations under 012 * the License. 013 */ 014package org.atteo.evo.inflector; 015 016/** 017 * Transforms English words from singular to plural form. 018 * <p> 019 * Examples: 020 * <pre> 021 * English.plural("word") = "words"; 022 * 023 * English.plural("cat", 1) = "cat"; 024 * English.plural("cat", 2) = "cats"; 025 * </pre> 026 * </p> 027 * <p> 028 * Based on <a href="http://www.csse.monash.edu.au/~damian/papers/HTML/Plurals.html"> 029 * An Algorithmic Approach to English Pluralization</a> by Damian Conway. 030 * </p> 031 */ 032public class English extends TwoFormInflector { 033 public static enum MODE { 034 ENGLISH_ANGLICIZED, ENGLISH_CLASSICAL 035 } 036 037 private static final String[] CATEGORY_EX_ICES = { "codex", "murex", 038 "silex", }; 039 040 private static final String[] CATEGORY_IX_ICES = { "radix", "helix", }; 041 042 private static final String[] CATEGORY_UM_A = { "bacterium", 043 "agendum", "desideratum", "erratum", "stratum", "datum", "ovum", 044 "extremum", "candelabrum", }; 045 046 // Always us -> i 047 private static final String[] CATEGORY_US_I = { "alumnus", "alveolus", 048 "bacillus", "bronchus", "locus", "nucleus", "stimulus", "meniscus", 049 "thesaurus", }; 050 051 private static final String[] CATEGORY_ON_A = { "criterion", 052 "perihelion", "aphelion", "phenomenon", "prolegomenon", "noumenon", 053 "organon", "asyndeton", "hyperbaton", }; 054 055 private static final String[] CATEGORY_A_AE = { "alumna", "alga", 056 "vertebra", "persona" }; 057 058 // Always o -> os 059 private static final String[] CATEGORY_O_OS = { "albino", 060 "archipelago", "armadillo", "commando", "crescendo", "fiasco", 061 "ditto", "dynamo", "embryo", "ghetto", "guano", "inferno", "jumbo", 062 "lumbago", "magneto", "manifesto", "medico", "octavo", "photo", 063 "pro", "quarto", "canto", "lingo", "generalissimo", "stylo", 064 "rhino", "casino", "auto", "macro", "zero", 065 }; 066 067 // Classical o -> i (normally -> os) 068 private static final String[] CATEGORY_O_I = { 069 "solo", "soprano", "basso", "alto", "contralto", "tempo", "piano", 070 "virtuoso", }; 071 072 private static final String[] CATEGORY_EN_INA = { 073 "stamen", "foramen", "lumen" 074 }; 075 076 // -a to -as (anglicized) or -ata (classical) 077 private static final String[] CATEGORY_A_ATA = { 078 "anathema", "enema", "oedema", "bema", "enigma", "sarcoma", 079 "carcinoma", "gumma", "schema", "charisma", "lemma", "soma", 080 "diploma", "lymphoma", "stigma", "dogma", "magma", "stoma", 081 "drama", "melisma", "trauma", "edema", "miasma" 082 }; 083 084 private static final String[] CATEGORY_IS_IDES = { 085 "iris", "clitoris" 086 }; 087 088 // -us to -uses (anglicized) or -us (classical) 089 private static final String[] CATEGORY_US_US = { 090 "apparatus", "impetus", "prospectus", "cantus", "nexus", "sinus", "coitus", 091 "plexus", "status", "hiatus" 092 }; 093 094 private static final String[] CATEGORY_NONE_I = { 095 "afreet", "afrit", "efreet" 096 }; 097 098 private static final String[] CATEGORY_NONE_IM = { 099 "cherub", "goy", "seraph" 100 }; 101 102 private static final String[] CATEGORY_EX_EXES = { 103 "apex", "latex", "vertex", "cortex", "pontifex", "vortex", "index", "simplex" 104 }; 105 106 private static final String[] CATEGORY_IX_IXES = { 107 "appendix" 108 }; 109 110 private static final String[] CATEGORY_S_ES = { 111 "acropolis", "chaos", "lens", "aegis", 112 "cosmos", "mantis", "alias", "dais", "marquis", "asbestos", 113 "digitalis", "metropolis", "atlas", "epidermis", "pathos", 114 "bathos", "ethos", "pelvis", "bias", "gas", "polis", "caddis", 115 "glottis", "rhinoceros", "cannabis", "glottis", "sassafras", 116 "canvas", "ibis", "trellis" 117 }; 118 119 private static final String[] CATEGORY_MAN_MANS = { 120 "human", "Alabaman", "Bahaman", "Burman", "German", "Hiroshiman", "Liman", "Nakayaman", "Oklahoman", 121 "Panaman", "Selman", "Sonaman", "Tacoman", "Yakiman", "Yokohaman", "Yuman" 122 }; 123 124 private static English inflector = new English(); 125 126 127 public English() { 128 this(MODE.ENGLISH_ANGLICIZED); 129 } 130 131 public English(MODE mode) { 132 133 uncountable(new String[] { 134 // 2. Handle words that do not inflect in the plural (such as fish, travois, chassis, nationalities ending 135 // endings 136 "fish", "ois", "sheep", "deer", "pox", "itis", 137 138 // words 139 "bison", "flounder", "pliers", "bream", 140 "gallows", "proceedings", "breeches", "graffiti", "rabies", 141 "britches", "headquarters", "salmon", "carp", "herpes", 142 "scissors", "chassis", "high-jinks", "sea-bass", "clippers", 143 "homework", "series", "cod", "innings", "shears", 144 "contretemps", "jackanapes", "species", "corps", "mackerel", 145 "swine", "debris", "measles", "trout", "diabetes", "mews", 146 "tuna", "djinn", "mumps", "whiting", "eland", "news", 147 "wildebeest", "elk", "pincers", "sugar" }); 148 149 // 4. Handle standard irregular plurals (mongooses, oxen, etc.) 150 151 irregular(new String[][] { 152 { "child", "children" }, // classical 153 { "ephemeris", "ephemerides" }, // classical 154 { "mongoose", "mongoose" }, // anglicized 155 { "mythos", "mythoi" }, // classical 156 // TODO: handle entire word correctly 157 //{ "ox", "oxen" }, // classical 158 { "soliloquy", "soliloquies" }, // anglicized 159 { "trilby", "trilbys" }, // anglicized 160 { "genus", "genera" }, // classical 161 { "quiz", "quizzes" }, 162 }); 163 164 if (mode == MODE.ENGLISH_ANGLICIZED) { 165 // Anglicized plural 166 irregular(new String[][] { 167 { "beef", "beefs" }, 168 { "brother", "brothers" }, 169 { "cow", "cows" }, 170 { "genie", "genies" }, 171 { "money", "moneys" }, 172 { "octopus", "octopuses" }, 173 { "opus", "opuses" }, 174 }); 175 } else if (mode == MODE.ENGLISH_CLASSICAL) { 176 // Classical plural 177 irregular(new String[][] { { "beef", "beeves"}, 178 { "brother", "brethren" }, 179 { "cos", "kine" }, { "genie", "genii"}, 180 { "money", "monies" }, 181 { "octopus", "octopodes" }, 182 { "opus", "opera" }, 183 }); 184 } 185 186 categoryRule(CATEGORY_MAN_MANS, "", "s"); 187 188 // questionable 189 /* 190 rule(new String[][] { 191 { "(ness)$", "$1" }, 192 { "(ality)$", "$1" } 193 { "(icity)$", "$1" }, 194 { "(ivity)$", "$1" }, 195 }); 196 */ 197 // 5. Handle irregular inflections for common suffixes 198 rule(new String[][] { 199 { "man$", "men" }, 200 { "([lm])ouse$", "$1ice" }, 201 { "tooth$", "teeth" }, 202 { "goose$", "geese" }, 203 { "foot$", "feet" }, 204 { "zoon$", "zoa" }, 205 { "([csx])is$", "$1es" }, 206 }); 207 208 // 6. Handle fully assimilated classical inflections 209 categoryRule(CATEGORY_EX_ICES, "ex", "ices"); 210 categoryRule(CATEGORY_IX_ICES, "ix", "ices"); 211 categoryRule(CATEGORY_UM_A, "um", "a"); 212 categoryRule(CATEGORY_ON_A, "on", "a"); 213 categoryRule(CATEGORY_A_AE, "a", "ae"); 214 215 // 7. Handle classical variants of modern inflections 216 if (mode == MODE.ENGLISH_CLASSICAL) { 217 rule(new String[][]{ 218 { "trix$", "trices" }, 219 { "eau$", "eaux" }, 220 { "ieu$", "ieux" }, 221 { "(..[iay])nx$", "$1nges" }, 222 }); 223 categoryRule(CATEGORY_EN_INA, "en", "ina"); 224 categoryRule(CATEGORY_A_ATA, "a", "ata"); 225 categoryRule(CATEGORY_IS_IDES, "is", "ides"); 226 categoryRule(CATEGORY_US_US, "", ""); 227 categoryRule(CATEGORY_O_I, "o", "i"); 228 categoryRule(CATEGORY_NONE_I, "", "i"); 229 categoryRule(CATEGORY_NONE_IM, "", "im"); 230 categoryRule(CATEGORY_EX_EXES, "ex", "ices"); 231 categoryRule(CATEGORY_IX_IXES, "ix", "ices"); 232 } 233 234 categoryRule(CATEGORY_US_I, "us", "i"); 235 236 rule("([cs]h|[zx])$", "$1es"); 237 categoryRule(CATEGORY_S_ES, "", "es"); 238 categoryRule(CATEGORY_IS_IDES, "", "es"); 239 categoryRule(CATEGORY_US_US, "", "es"); 240 rule("(us)$", "$1es"); 241 categoryRule(CATEGORY_A_ATA, "", "s"); 242 243 // The suffixes -ch, -sh, and -ss all take -es in the plural (churches, 244 // classes, etc)... 245 rule(new String[][] { { "([cs])h$", "$1hes" }, { "ss$", "sses" } }); 246 247 // Certain words ending in -f or -fe take -ves in the plural (lives, 248 // wolves, etc)... 249 rule(new String[][] { 250 { "([aeo]l)f$", "$1ves" }, 251 { "([^d]ea)f$", "$1ves" }, 252 { "(ar)f$", "$1ves" }, 253 { "([nlw]i)fe$", "$1ves" } 254 }); 255 256 // Words ending in -y take -ys 257 rule(new String[][] { { "([aeiou])y$", "$1ys" }, { "y$", "ies" }, }); 258 259 // Some words ending in -o take -os (including does preceded by a vowel) 260 categoryRule(CATEGORY_O_I, "o", "os"); 261 categoryRule(CATEGORY_O_OS, "o", "os"); 262 rule("([aeiou])o$", "$1os"); 263 // The rest take -oes 264 rule("o$", "oes"); 265 266 rule("ulum", "ula"); 267 268 categoryRule(CATEGORY_A_ATA, "", "es"); 269 270 rule("s$", "ses"); 271 // Otherwise, assume that the plural just adds -s 272 rule("$", "s"); 273 } 274 275 /** 276 * Returns plural form of the given word. 277 * 278 * @param word word in singular form 279 * @return plural form of the word 280 */ 281 @Override 282 public String getPlural(String word) { 283 return super.getPlural(word); 284 } 285 286 /** 287 * Returns singular or plural form of the word based on count. 288 * 289 * @param word word in singular form 290 * @param count word count 291 * @return form of the word correct for given count 292 */ 293 public String getPlural(String word, int count) { 294 if (count == 1) { 295 return word; 296 } 297 return getPlural(word); 298 } 299 300 /** 301 * Returns plural form of the given word. 302 * <p> 303 * For instance: 304 * <pre> 305 * {@code 306 * English.plural("cat") == "cats"; 307 * } 308 * </pre> 309 * </p> 310 * @param word word in singular form 311 * @return plural form of given word 312 */ 313 public static String plural(String word) { 314 return inflector.getPlural(word); 315 } 316 317 /** 318 * Returns singular or plural form of the word based on count. 319 * <p> 320 * For instance: 321 * <pre> 322 * {@code 323 * English.plural("cat", 1) == "cat"; 324 * English.plural("cat", 2) == "cats"; 325 * } 326 * </pre> 327 * </p> 328 * @param word word in singular form 329 * @param count word count 330 * @return form of the word correct for given count 331 */ 332 public static String plural(String word, int count) { 333 return inflector.getPlural(word, count); 334 } 335 336 public static void setMode(MODE mode) { 337 English newInflector = new English(mode); 338 inflector = newInflector; 339 } 340}