001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2022 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import java.util.ArrayList; 023import java.util.BitSet; 024import java.util.HashMap; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Pattern; 028 029import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 030import com.puppycrawl.tools.checkstyle.PropertyType; 031import com.puppycrawl.tools.checkstyle.XdocsPropertyType; 032import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 033import com.puppycrawl.tools.checkstyle.api.DetailAST; 034import com.puppycrawl.tools.checkstyle.api.TokenTypes; 035import com.puppycrawl.tools.checkstyle.utils.CheckUtil; 036import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 037 038/** 039 * <p> 040 * Checks for multiple occurrences of the same string literal within a single file. 041 * </p> 042 * <p> 043 * Rationale: Code duplication makes maintenance more difficult, so it can be better 044 * to replace the multiple occurrences with a constant. 045 * </p> 046 * <ul> 047 * <li> 048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences 049 * to allow without generating a warning. 050 * Type is {@code int}. 051 * Default value is {@code 1}. 052 * </li> 053 * <li> 054 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks). 055 * Type is {@code java.util.regex.Pattern}. 056 * Default value is {@code "^""$"}. 057 * </li> 058 * <li> 059 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate 060 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to 061 * exclude syntactical contexts like annotations or static initializers from the check. 062 * Type is {@code java.lang.String[]}. 063 * Validation type is {@code tokenTypesSet}. 064 * Default value is {@code ANNOTATION}. 065 * </li> 066 * </ul> 067 * <p> 068 * To configure the check: 069 * </p> 070 * <pre> 071 * <module name="MultipleStringLiterals"/> 072 * </pre> 073 * <p> 074 * Example: 075 * </p> 076 * <pre> 077 * public class MyClass { 078 * String a = "StringContents"; 079 * String a1 = "unchecked"; 080 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 081 * public void myTest() { 082 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 083 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 084 * String a4 = "SingleString"; // OK 085 * String a5 = ", " + ", " + ", "; // violation, ", " occurs three times 086 * } 087 * } 088 * </pre> 089 * <p> 090 * To configure the check so that it allows two occurrences of each string: 091 * </p> 092 * <pre> 093 * <module name="MultipleStringLiterals"> 094 * <property name="allowedDuplicates" value="2"/> 095 * </module> 096 * </pre> 097 * <p> 098 * Example: 099 * </p> 100 * <pre> 101 * public class MyClass { 102 * String a = "StringContents"; 103 * String a1 = "unchecked"; 104 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 105 * public void myTest() { 106 * String a2 = "StringContents"; // OK, two occurrences are allowed 107 * String a3 = "DoubleString" + "DoubleString"; // OK, two occurrences are allowed 108 * String a4 = "SingleString"; // OK 109 * String a5 = ", " + ", " + ", "; // violation, three occurrences are NOT allowed 110 * } 111 * } 112 * </pre> 113 * <p> 114 * To configure the check so that it ignores ", " and empty strings: 115 * </p> 116 * <pre> 117 * <module name="MultipleStringLiterals"> 118 * <property name="ignoreStringsRegexp" 119 * value='^(("")|(", "))$'/> 120 * </module> 121 * </pre> 122 * <p> 123 * Example: 124 * </p> 125 * <pre> 126 * public class MyClass { 127 * String a = "StringContents"; 128 * String a1 = "unchecked"; 129 * @SuppressWarnings("unchecked") // OK, duplicate strings are ignored in annotations 130 * public void myTest() { 131 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 132 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 133 * String a4 = "SingleString"; // OK 134 * String a5 = ", " + ", " + ", "; // OK, multiple occurrences of ", " are allowed 135 * } 136 * } 137 * </pre> 138 * <p> 139 * To configure the check so that it flags duplicate strings in all syntactical contexts, 140 * even in annotations like {@code @SuppressWarnings("unchecked")}: 141 * </p> 142 * <pre> 143 * <module name="MultipleStringLiterals"> 144 * <property name="ignoreOccurrenceContext" value=""/> 145 * </module> 146 * </pre> 147 * <p> 148 * Example: 149 * </p> 150 * <pre> 151 * public class MyClass { 152 * String a = "StringContents"; 153 * String a1 = "unchecked"; 154 * @SuppressWarnings("unchecked") // violation, "unchecked" occurs twice 155 * public void myTest() { 156 * String a2 = "StringContents"; // violation, "StringContents" occurs twice 157 * String a3 = "DoubleString" + "DoubleString"; // violation, "DoubleString" occurs twice 158 * String a4 = "SingleString"; // OK 159 * String a5 = ", " + ", " + ", "; // violation, ", " occurs three times 160 * } 161 * } 162 * </pre> 163 * <p> 164 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 165 * </p> 166 * <p> 167 * Violation Message Keys: 168 * </p> 169 * <ul> 170 * <li> 171 * {@code multiple.string.literal} 172 * </li> 173 * </ul> 174 * 175 * @since 3.5 176 */ 177@FileStatefulCheck 178public class MultipleStringLiteralsCheck extends AbstractCheck { 179 180 /** 181 * A key is pointing to the warning message text in "messages.properties" 182 * file. 183 */ 184 public static final String MSG_KEY = "multiple.string.literal"; 185 186 /** 187 * Compiled pattern for all system newlines. 188 */ 189 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R"); 190 191 /** 192 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL. 193 */ 194 private static final String QUOTE = "\""; 195 196 /** 197 * The found strings and their tokens. 198 */ 199 private final Map<String, List<DetailAST>> stringMap = new HashMap<>(); 200 201 /** 202 * Specify token type names where duplicate strings are ignored even if they 203 * don't match ignoredStringsRegexp. This allows you to exclude syntactical 204 * contexts like annotations or static initializers from the check. 205 */ 206 @XdocsPropertyType(PropertyType.TOKEN_ARRAY) 207 private final BitSet ignoreOccurrenceContext = new BitSet(); 208 209 /** 210 * Specify the maximum number of occurrences to allow without generating a warning. 211 */ 212 private int allowedDuplicates = 1; 213 214 /** 215 * Specify RegExp for ignored strings (with quotation marks). 216 */ 217 private Pattern ignoreStringsRegexp; 218 219 /** 220 * Construct an instance with default values. 221 */ 222 public MultipleStringLiteralsCheck() { 223 setIgnoreStringsRegexp(Pattern.compile("^\"\"$")); 224 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION); 225 } 226 227 /** 228 * Setter to specify the maximum number of occurrences to allow without generating a warning. 229 * 230 * @param allowedDuplicates The maximum number of duplicates. 231 */ 232 public void setAllowedDuplicates(int allowedDuplicates) { 233 this.allowedDuplicates = allowedDuplicates; 234 } 235 236 /** 237 * Setter to specify RegExp for ignored strings (with quotation marks). 238 * 239 * @param ignoreStringsRegexp 240 * regular expression pattern for ignored strings 241 * @noinspection WeakerAccess 242 */ 243 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) { 244 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) { 245 this.ignoreStringsRegexp = null; 246 } 247 else { 248 this.ignoreStringsRegexp = ignoreStringsRegexp; 249 } 250 } 251 252 /** 253 * Setter to specify token type names where duplicate strings are ignored even 254 * if they don't match ignoredStringsRegexp. This allows you to exclude 255 * syntactical contexts like annotations or static initializers from the check. 256 * 257 * @param strRep the string representation of the tokens interested in 258 */ 259 public final void setIgnoreOccurrenceContext(String... strRep) { 260 ignoreOccurrenceContext.clear(); 261 for (final String s : strRep) { 262 final int type = TokenUtil.getTokenId(s); 263 ignoreOccurrenceContext.set(type); 264 } 265 } 266 267 @Override 268 public int[] getDefaultTokens() { 269 return getRequiredTokens(); 270 } 271 272 @Override 273 public int[] getAcceptableTokens() { 274 return getRequiredTokens(); 275 } 276 277 @Override 278 public int[] getRequiredTokens() { 279 return new int[] { 280 TokenTypes.STRING_LITERAL, 281 TokenTypes.TEXT_BLOCK_CONTENT, 282 }; 283 } 284 285 @Override 286 public void visitToken(DetailAST ast) { 287 if (!isInIgnoreOccurrenceContext(ast)) { 288 final String currentString; 289 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) { 290 final String strippedString = 291 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText()); 292 // We need to add quotes here to be consistent with STRING_LITERAL text. 293 currentString = QUOTE + strippedString + QUOTE; 294 } 295 else { 296 currentString = ast.getText(); 297 } 298 if (ignoreStringsRegexp == null 299 || !ignoreStringsRegexp.matcher(currentString).find()) { 300 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast); 301 } 302 } 303 } 304 305 /** 306 * Analyses the path from the AST root to a given AST for occurrences 307 * of the token types in {@link #ignoreOccurrenceContext}. 308 * 309 * @param ast the node from where to start searching towards the root node 310 * @return whether the path from the root node to ast contains one of the 311 * token type in {@link #ignoreOccurrenceContext}. 312 */ 313 private boolean isInIgnoreOccurrenceContext(DetailAST ast) { 314 boolean isInIgnoreOccurrenceContext = false; 315 for (DetailAST token = ast; 316 token.getParent() != null; 317 token = token.getParent()) { 318 final int type = token.getType(); 319 if (ignoreOccurrenceContext.get(type)) { 320 isInIgnoreOccurrenceContext = true; 321 break; 322 } 323 } 324 return isInIgnoreOccurrenceContext; 325 } 326 327 @Override 328 public void beginTree(DetailAST rootAST) { 329 stringMap.clear(); 330 } 331 332 @Override 333 public void finishTree(DetailAST rootAST) { 334 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) { 335 final List<DetailAST> hits = stringListEntry.getValue(); 336 if (hits.size() > allowedDuplicates) { 337 final DetailAST firstFinding = hits.get(0); 338 final String recurringString = 339 ALL_NEW_LINES.matcher( 340 stringListEntry.getKey()).replaceAll("\\\\n"); 341 log(firstFinding, MSG_KEY, recurringString, hits.size()); 342 } 343 } 344 } 345} 346