001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2022 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.coding;
021
022import java.util.ArrayList;
023import java.util.BitSet;
024import java.util.HashMap;
025import java.util.List;
026import java.util.Map;
027import java.util.regex.Pattern;
028
029import com.puppycrawl.tools.checkstyle.FileStatefulCheck;
030import com.puppycrawl.tools.checkstyle.PropertyType;
031import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
032import com.puppycrawl.tools.checkstyle.api.AbstractCheck;
033import com.puppycrawl.tools.checkstyle.api.DetailAST;
034import com.puppycrawl.tools.checkstyle.api.TokenTypes;
035import com.puppycrawl.tools.checkstyle.utils.CheckUtil;
036import com.puppycrawl.tools.checkstyle.utils.TokenUtil;
037
038/**
039 * <p>
040 * Checks for multiple occurrences of the same string literal within a single file.
041 * </p>
042 * <p>
043 * Rationale: Code duplication makes maintenance more difficult, so it can be better
044 * to replace the multiple occurrences with a constant.
045 * </p>
046 * <ul>
047 * <li>
048 * Property {@code allowedDuplicates} - Specify the maximum number of occurrences
049 * to allow without generating a warning.
050 * Type is {@code int}.
051 * Default value is {@code 1}.
052 * </li>
053 * <li>
054 * Property {@code ignoreStringsRegexp} - Specify RegExp for ignored strings (with quotation marks).
055 * Type is {@code java.util.regex.Pattern}.
056 * Default value is {@code "^""$"}.
057 * </li>
058 * <li>
059 * Property {@code ignoreOccurrenceContext} - Specify token type names where duplicate
060 * strings are ignored even if they don't match ignoredStringsRegexp. This allows you to
061 * exclude syntactical contexts like annotations or static initializers from the check.
062 * Type is {@code java.lang.String[]}.
063 * Validation type is {@code tokenTypesSet}.
064 * Default value is {@code ANNOTATION}.
065 * </li>
066 * </ul>
067 * <p>
068 * To configure the check:
069 * </p>
070 * <pre>
071 * &lt;module name=&quot;MultipleStringLiterals&quot;/&gt;
072 * </pre>
073 * <p>
074 * To configure the check so that it allows two occurrences of each string:
075 * </p>
076 * <pre>
077 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
078 *   &lt;property name=&quot;allowedDuplicates&quot; value=&quot;2&quot;/&gt;
079 * &lt;/module&gt;
080 * </pre>
081 * <p>
082 * To configure the check so that it ignores ", " and empty strings:
083 * </p>
084 * <pre>
085 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
086 *   &lt;property name=&quot;ignoreStringsRegexp&quot;
087 *     value='^((&quot;&quot;)|(&quot;, &quot;))$'/&gt;
088 * &lt;/module&gt;
089 * </pre>
090 * <p>
091 * To configure the check so that it flags duplicate strings in all syntactical contexts,
092 * even in annotations like {@code @SuppressWarnings("unchecked")}:
093 * </p>
094 * <pre>
095 * &lt;module name=&quot;MultipleStringLiterals&quot;&gt;
096 *   &lt;property name=&quot;ignoreOccurrenceContext&quot; value=&quot;&quot;/&gt;
097 * &lt;/module&gt;
098 * </pre>
099 * <p>
100 * Parent is {@code com.puppycrawl.tools.checkstyle.TreeWalker}
101 * </p>
102 * <p>
103 * Violation Message Keys:
104 * </p>
105 * <ul>
106 * <li>
107 * {@code multiple.string.literal}
108 * </li>
109 * </ul>
110 *
111 * @since 3.5
112 */
113@FileStatefulCheck
114public class MultipleStringLiteralsCheck extends AbstractCheck {
115
116    /**
117     * A key is pointing to the warning message text in "messages.properties"
118     * file.
119     */
120    public static final String MSG_KEY = "multiple.string.literal";
121
122    /**
123     * Compiled pattern for all system newlines.
124     */
125    private static final Pattern ALL_NEW_LINES = Pattern.compile("\\R");
126
127    /**
128     * String used to amend TEXT_BLOCK_CONTENT so that it matches STRING_LITERAL.
129     */
130    private static final String QUOTE = "\"";
131
132    /**
133     * The found strings and their tokens.
134     */
135    private final Map<String, List<DetailAST>> stringMap = new HashMap<>();
136
137    /**
138     * Specify token type names where duplicate strings are ignored even if they
139     * don't match ignoredStringsRegexp. This allows you to exclude syntactical
140     * contexts like annotations or static initializers from the check.
141     */
142    @XdocsPropertyType(PropertyType.TOKEN_ARRAY)
143    private final BitSet ignoreOccurrenceContext = new BitSet();
144
145    /**
146     * Specify the maximum number of occurrences to allow without generating a warning.
147     */
148    private int allowedDuplicates = 1;
149
150    /**
151     * Specify RegExp for ignored strings (with quotation marks).
152     */
153    private Pattern ignoreStringsRegexp;
154
155    /**
156     * Construct an instance with default values.
157     */
158    public MultipleStringLiteralsCheck() {
159        setIgnoreStringsRegexp(Pattern.compile("^\"\"$"));
160        ignoreOccurrenceContext.set(TokenTypes.ANNOTATION);
161    }
162
163    /**
164     * Setter to specify the maximum number of occurrences to allow without generating a warning.
165     *
166     * @param allowedDuplicates The maximum number of duplicates.
167     */
168    public void setAllowedDuplicates(int allowedDuplicates) {
169        this.allowedDuplicates = allowedDuplicates;
170    }
171
172    /**
173     * Setter to specify RegExp for ignored strings (with quotation marks).
174     *
175     * @param ignoreStringsRegexp
176     *        regular expression pattern for ignored strings
177     * @noinspection WeakerAccess
178     */
179    public final void setIgnoreStringsRegexp(Pattern ignoreStringsRegexp) {
180        if (ignoreStringsRegexp == null || ignoreStringsRegexp.pattern().isEmpty()) {
181            this.ignoreStringsRegexp = null;
182        }
183        else {
184            this.ignoreStringsRegexp = ignoreStringsRegexp;
185        }
186    }
187
188    /**
189     * Setter to specify token type names where duplicate strings are ignored even
190     * if they don't match ignoredStringsRegexp. This allows you to exclude
191     * syntactical contexts like annotations or static initializers from the check.
192     *
193     * @param strRep the string representation of the tokens interested in
194     */
195    public final void setIgnoreOccurrenceContext(String... strRep) {
196        ignoreOccurrenceContext.clear();
197        for (final String s : strRep) {
198            final int type = TokenUtil.getTokenId(s);
199            ignoreOccurrenceContext.set(type);
200        }
201    }
202
203    @Override
204    public int[] getDefaultTokens() {
205        return getRequiredTokens();
206    }
207
208    @Override
209    public int[] getAcceptableTokens() {
210        return getRequiredTokens();
211    }
212
213    @Override
214    public int[] getRequiredTokens() {
215        return new int[] {
216            TokenTypes.STRING_LITERAL,
217            TokenTypes.TEXT_BLOCK_CONTENT,
218        };
219    }
220
221    @Override
222    public void visitToken(DetailAST ast) {
223        if (!isInIgnoreOccurrenceContext(ast)) {
224            final String currentString;
225            if (ast.getType() == TokenTypes.TEXT_BLOCK_CONTENT) {
226                final String strippedString =
227                    CheckUtil.stripIndentAndInitialNewLineFromTextBlock(ast.getText());
228                // We need to add quotes here to be consistent with STRING_LITERAL text.
229                currentString = QUOTE + strippedString + QUOTE;
230            }
231            else {
232                currentString = ast.getText();
233            }
234            if (ignoreStringsRegexp == null
235                    || !ignoreStringsRegexp.matcher(currentString).find()) {
236                stringMap.computeIfAbsent(currentString, key -> new ArrayList<>()).add(ast);
237            }
238        }
239    }
240
241    /**
242     * Analyses the path from the AST root to a given AST for occurrences
243     * of the token types in {@link #ignoreOccurrenceContext}.
244     *
245     * @param ast the node from where to start searching towards the root node
246     * @return whether the path from the root node to ast contains one of the
247     *     token type in {@link #ignoreOccurrenceContext}.
248     */
249    private boolean isInIgnoreOccurrenceContext(DetailAST ast) {
250        boolean isInIgnoreOccurrenceContext = false;
251        for (DetailAST token = ast;
252             token.getParent() != null;
253             token = token.getParent()) {
254            final int type = token.getType();
255            if (ignoreOccurrenceContext.get(type)) {
256                isInIgnoreOccurrenceContext = true;
257                break;
258            }
259        }
260        return isInIgnoreOccurrenceContext;
261    }
262
263    @Override
264    public void beginTree(DetailAST rootAST) {
265        stringMap.clear();
266    }
267
268    @Override
269    public void finishTree(DetailAST rootAST) {
270        for (Map.Entry<String, List<DetailAST>> stringListEntry : stringMap.entrySet()) {
271            final List<DetailAST> hits = stringListEntry.getValue();
272            if (hits.size() > allowedDuplicates) {
273                final DetailAST firstFinding = hits.get(0);
274                final String recurringString =
275                    ALL_NEW_LINES.matcher(
276                        stringListEntry.getKey()).replaceAll("\\\\n");
277                log(firstFinding, MSG_KEY, recurringString, hits.size());
278            }
279        }
280    }
281}
282