001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2022 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.checks.coding; 021 022import java.util.ArrayList; 023import java.util.BitSet; 024import java.util.HashMap; 025import java.util.List; 026import java.util.Map; 027import java.util.regex.Pattern; 028 029import com.puppycrawl.tools.checkstyle.FileStatefulCheck; 030import com.puppycrawl.tools.checkstyle.PropertyType; 031import com.puppycrawl.tools.checkstyle.XdocsPropertyType; 032import com.puppycrawl.tools.checkstyle.api.AbstractCheck; 033import com.puppycrawl.tools.checkstyle.api.DetailAST; 034import com.puppycrawl.tools.checkstyle.api.TokenTypes; 035import com.puppycrawl.tools.checkstyle.utils.CheckUtil; 036import com.puppycrawl.tools.checkstyle.utils.TokenUtil; 037 038/** 039 * <p> 040 * Checks for multiple occurrences of the same string literal within a single file. 041 * </p> 042 * <p> 043 * Rationale: Code duplication makes maintenance more difficult, so it can be better 044 * to replace the multiple occurrences with a constant. 045 * </p> 046 * <ul> 047 * <li> 048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences 049 * to allow without generating a warning. 050 * Type is {@code int}. 051 * Default value is {@code 1}. 052 * </li> 053 * <li> 054 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks). 055 * Type is {@code java.util.regex.Pattern}. 056 * Default value is {@code "^""$"}. 057 * </li> 058 * <li> 059 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate 060 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to 061 * exclude syntactical contexts like annotations or static initializers from the check. 062 * Type is {@code java.lang.String[]}. 063 * Validation type is {@code tokenTypesSet}. 064 * Default value is {@code ANNOTATION}. 065 * </li> 066 * </ul> 067 * <p> 068 * To configure the check: 069 * </p> 070 * <pre> 071 * <module name="MultipleStringLiterals"/> 072 * </pre> 073 * <p> 074 * To configure the check so that it allows two occurrences of each string: 075 * </p> 076 * <pre> 077 * <module name="MultipleStringLiterals"> 078 * <property name="allowedDuplicates" value="2"/> 079 * </module> 080 * </pre> 081 * <p> 082 * To configure the check so that it ignores ", " and empty strings: 083 * </p> 084 * <pre> 085 * <module name="MultipleStringLiterals"> 086 * <property name="ignoreStringsRegexp" 087 * value='^(("")|(", "))$'/> 088 * </module> 089 * </pre> 090 * <p> 091 * To configure the check so that it flags duplicate strings in all syntactical contexts, 092 * even in annotations like {@code @SuppressWarnings("unchecked")}: 093 * </p> 094 * <pre> 095 * <module name="MultipleStringLiterals"> 096 * <property name="ignoreOccurrenceContext" value=""/> 097 * </module> 098 * </pre> 099 * <p> 100 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker} 101 * </p> 102 * <p> 103 * Violation Message Keys: 104 * </p> 105 * <ul> 106 * <li> 107 * {@code multiple.string.literal} 108 * </li> 109 * </ul> 110 * 111 * @since 3.5 112 */ 113@FileStatefulCheck 114public class MultipleStringLiteralsCheck extends AbstractCheck { 115 116 /** 117 * A key is pointing to the warning message text in "messages.properties" 118 * file. 119 */ 120 public static final String MSG_KEY = "multiple.string.literal"; 121 122 /** 123 * Compiled pattern for all system newlines. 124 */ 125 private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R"); 126 127 /** 128 * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL. 129 */ 130 private static final String QUOTE = "\""; 131 132 /** 133 * The found strings and their tokens. 134 */ 135 private final Map<String, List<DetailAST>> stringMap = new HashMap<>(); 136 137 /** 138 * Specify token type names where duplicate strings are ignored even if they 139 * don't match ignoredStringsRegexp. This allows you to exclude syntactical 140 * contexts like annotations or static initializers from the check. 141 */ 142 @XdocsPropertyType(PropertyType.TOKEN_ARRAY) 143 private final BitSet ignoreOccurrenceContext = new BitSet(); 144 145 /** 146 * Specify the maximum number of occurrences to allow without generating a warning. 147 */ 148 private int allowedDuplicates = 1; 149 150 /** 151 * Specify RegExp for ignored strings (with quotation marks). 152 */ 153 private Pattern ignoreStringsRegexp; 154 155 /** 156 * Construct an instance with default values. 157 */ 158 public MultipleStringLiteralsCheck() { 159 setIgnoreStringsRegexp(Pattern.compile("^\"\"$")); 160 ignoreOccurrenceContext.set(TokenTypes.ANNOTATION); 161 } 162 163 /** 164 * Setter to specify the maximum number of occurrences to allow without generating a warning. 165 * 166 * @param allowedDuplicates The maximum number of duplicates. 167 */ 168 public void setAllowedDuplicates(int allowedDuplicates) { 169 this.allowedDuplicates = allowedDuplicates; 170 } 171 172 /** 173 * Setter to specify RegExp for ignored strings (with quotation marks). 174 * 175 * @param ignoreStringsRegexp 176 * regular expression pattern for ignored strings 177 * @noinspection WeakerAccess 178 */ 179 public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) { 180 if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) { 181 this.ignoreStringsRegexp = null; 182 } 183 else { 184 this.ignoreStringsRegexp = ignoreStringsRegexp; 185 } 186 } 187 188 /** 189 * Setter to specify token type names where duplicate strings are ignored even 190 * if they don't match ignoredStringsRegexp. This allows you to exclude 191 * syntactical contexts like annotations or static initializers from the check. 192 * 193 * @param strRep the string representation of the tokens interested in 194 */ 195 public final void setIgnoreOccurrenceContext(String... strRep) { 196 ignoreOccurrenceContext.clear(); 197 for (final String s : strRep) { 198 final int type = TokenUtil.getTokenId(s); 199 ignoreOccurrenceContext.set(type); 200 } 201 } 202 203 @Override 204 public int[] getDefaultTokens() { 205 return getRequiredTokens(); 206 } 207 208 @Override 209 public int[] getAcceptableTokens() { 210 return getRequiredTokens(); 211 } 212 213 @Override 214 public int[] getRequiredTokens() { 215 return new int[] { 216 TokenTypes.STRING_LITERAL, 217 TokenTypes.TEXT_BLOCK_CONTENT, 218 }; 219 } 220 221 @Override 222 public void visitToken(DetailAST ast) { 223 if (!isInIgnoreOccurrenceContext(ast)) { 224 final String currentString; 225 if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) { 226 final String strippedString = 227 CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText()); 228 // We need to add quotes here to be consistent with STRING_LITERAL text. 229 currentString = QUOTE + strippedString + QUOTE; 230 } 231 else { 232 currentString = ast.getText(); 233 } 234 if (ignoreStringsRegexp == null 235 || !ignoreStringsRegexp.matcher(currentString).find()) { 236 stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast); 237 } 238 } 239 } 240 241 /** 242 * Analyses the path from the AST root to a given AST for occurrences 243 * of the token types in {@link #ignoreOccurrenceContext}. 244 * 245 * @param ast the node from where to start searching towards the root node 246 * @return whether the path from the root node to ast contains one of the 247 * token type in {@link #ignoreOccurrenceContext}. 248 */ 249 private boolean isInIgnoreOccurrenceContext(DetailAST ast) { 250 boolean isInIgnoreOccurrenceContext = false; 251 for (DetailAST token = ast; 252 token.getParent() != null; 253 token = token.getParent()) { 254 final int type = token.getType(); 255 if (ignoreOccurrenceContext.get(type)) { 256 isInIgnoreOccurrenceContext = true; 257 break; 258 } 259 } 260 return isInIgnoreOccurrenceContext; 261 } 262 263 @Override 264 public void beginTree(DetailAST rootAST) { 265 stringMap.clear(); 266 } 267 268 @Override 269 public void finishTree(DetailAST rootAST) { 270 for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) { 271 final List<DetailAST> hits = stringListEntry.getValue(); 272 if (hits.size() > allowedDuplicates) { 273 final DetailAST firstFinding = hits.get(0); 274 final String recurringString = 275 ALL_NEW_LINES.matcher( 276 stringListEntry.getKey()).replaceAll("\\\\n"); 277 log(firstFinding, MSG_KEY, recurringString, hits.size()); 278 } 279 } 280 } 281} 282