001////////////////////////////////////////////////////////////////////////////////
002// checkstyle: Checks Java source code for adherence to a set of rules.
003// Copyright (C) 2001-2022 the original author or authors.
004//
005// This library is free software; you can redistribute it and/or
006// modify it under the terms of the GNU Lesser General Public
007// License as published by the Free Software Foundation; either
008// version 2.1 of the License, or (at your option) any later version.
009//
010// This library is distributed in the hope that it will be useful,
011// but WITHOUT ANY WARRANTY; without even the implied warranty of
012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
013// Lesser General Public License for more details.
014//
015// You should have received a copy of the GNU Lesser General Public
016// License along with this library; if not, write to the Free Software
017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
018////////////////////////////////////////////////////////////////////////////////
019
020package com.puppycrawl.tools.checkstyle.checks.regexp;
021
022import java.io.File;
023import java.util.regex.Pattern;
024
025import com.puppycrawl.tools.checkstyle.PropertyType;
026import com.puppycrawl.tools.checkstyle.StatelessCheck;
027import com.puppycrawl.tools.checkstyle.XdocsPropertyType;
028import com.puppycrawl.tools.checkstyle.api.AbstractFileSetCheck;
029import com.puppycrawl.tools.checkstyle.api.FileText;
030
031/**
032 * <p>
033 * Checks that a specified pattern matches across multiple lines in any file type.
034 * </p>
035 * <p>
036 * Rationale: This check can be used to when the regular expression can be span multiple lines.
037 * </p>
038 * <ul>
039 * <li>
040 * Property {@code format} - Specify the format of the regular expression to match.
041 * Type is {@code java.util.regex.Pattern}.
042 * Default value is {@code "$."}.
043 * </li>
044 * <li>
045 * Property {@code message} - Specify the message which is used to notify about
046 * violations, if empty then default (hard-coded) message is used.
047 * Type is {@code java.lang.String}.
048 * Default value is {@code null}.
049 * </li>
050 * <li>
051 * Property {@code ignoreCase} - Control whether to ignore case when searching.
052 * Type is {@code boolean}.
053 * Default value is {@code false}.
054 * </li>
055 * <li>
056 * Property {@code minimum} - Specify the minimum number of matches required in each file.
057 * Type is {@code int}.
058 * Default value is {@code 0}.
059 * </li>
060 * <li>
061 * Property {@code maximum} - Specify the maximum number of matches required in each file.
062 * Type is {@code int}.
063 * Default value is {@code 0}.
064 * </li>
065 * <li>
066 * Property {@code matchAcrossLines} - Control whether to match expressions
067 * across multiple lines.
068 * Type is {@code boolean}.
069 * Default value is {@code false}.
070 * </li>
071 * <li>
072 * Property {@code fileExtensions} - Specify the file type extension of files to process.
073 * Type is {@code java.lang.String[]}.
074 * Default value is {@code ""}.
075 * </li>
076 * </ul>
077 * <p>
078 * To run the check with its default configuration (no matches will be):
079 * </p>
080 * <pre>
081 * &lt;module name=&quot;RegexpMultiline&quot;/&gt;
082 * </pre>
083 * <p>Example: </p>
084 * <pre>
085 * void method() {
086 *   int i = 5; // OK
087 *   System.out.println(i); // OK
088 * }
089 * </pre>
090 * <p>
091 * To configure the check to find calls to print to the console:
092 * </p>
093 * <pre>
094 * &lt;module name="RegexpMultiline"&gt;
095 *   &lt;property name="format" value="System\.(out)|(err)\.print(ln)?\("/&gt;
096 * &lt;/module&gt;
097 * </pre>
098 * <p>
099 * Example:
100 * </p>
101 * <pre>
102 * void method() {
103 *   System.out.print("Example");   // violation
104 *   System.err.println("Example"); // violation
105 *   System.out.print
106 *     ("Example");                 // violation
107 *   System.err.println
108 *     ("Example");          // OK
109 *   System
110 *   .out.print("Example");  // OK
111 *   System
112 *   .err.println("Example");       // violation
113 *   System.
114 *   out.print("Example");   // OK
115 *   System.
116 *   err.println("Example");        // violation
117 * }
118 * </pre>
119 * <p>
120 * To configure the check to match text that spans multiple lines,
121 * like normal code in a Java file:
122 * </p>
123 * <pre>
124 * &lt;module name="RegexpMultiline"&gt;
125 *   &lt;property name="matchAcrossLines" value="true"/&gt;
126 *   &lt;property name="format" value="System\.out.*?print\("/&gt;
127 * &lt;/module&gt;
128 * </pre>
129 * <p>
130 * Example:
131 * </p>
132 * <pre>
133 * void method() {
134 *   System.out.print("Example");  // violation
135 *   System.err.println("Example");
136 *   System.out.print              // violation
137 *     ("Example");
138 *   System.err.println
139 *     ("Example");
140 *   System
141 *   .out.print("Example");
142 *   System
143 *   .err.println("Example");
144 *   System.
145 *   out.print("Example");
146 *   System.
147 *   err.println("Example");
148 * }
149 * </pre>
150 * <p>
151 * Note: Beware of the greedy regular expression used in the above example.
152 * {@code .*} will match as much as possible and not produce multiple violations
153 * in the file if multiple groups of lines could match the expression. To prevent
154 * an expression being too greedy, avoid overusing matching all text or allow it
155 * to be optional, like {@code .*?}. Changing the example expression to not be
156 * greedy will allow multiple violations in the example to be found in the same file.
157 * </p>
158 * <p>
159 * To configure the check to match a maximum of three test strings:
160 * </p>
161 * <pre>
162 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
163 *   &lt;property name=&quot;format&quot; value=&quot;Test #[0-9]+:[A-Za-z ]+&quot;/&gt;
164 *   &lt;property name=&quot;ignoreCase&quot; value=&quot;true&quot;/&gt;
165 *   &lt;property name=&quot;maximum&quot; value=&quot;3&quot;/&gt;
166 * &lt;/module&gt;
167 * </pre>
168 * <p>
169 * Example:
170 * </p>
171 * <pre>
172 * void method() {
173 *   System.out.println("Test #1: this is a test string"); // OK
174 *   System.out.println("TeSt #2: This is a test string"); // OK
175 *   System.out.println("TEST #3: This is a test string"); // OK
176 *   int i = 5;
177 *   System.out.println("Value of i: " + i);
178 *   System.out.println("Test #4: This is a test string"); // violation
179 *   System.out.println("TEst #5: This is a test string"); // violation
180 * }
181 * </pre>
182 * <p>
183 * To configure the check to match a minimum of two test strings:
184 * </p>
185 * <pre>
186 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
187 *   &lt;property name=&quot;format&quot; value=&quot;Test #[0-9]+:[A-Za-z ]+&quot;/&gt;
188 *   &lt;property name=&quot;minimum&quot; value=&quot;2&quot;/&gt;
189 * &lt;/module&gt;
190 * </pre>
191 * <p>
192 * Example:
193 * </p>
194 * <pre>
195 * void method() {
196 *   System.out.println("Test #1: this is a test string"); // violation
197 *   System.out.println("TEST #2: This is a test string"); // OK, "ignoreCase" is false by default
198 *   int i = 5;
199 *   System.out.println("Value of i: " + i);
200 *   System.out.println("Test #3: This is a test string"); // violation
201 *   System.out.println("Test #4: This is a test string"); // violation
202 * }
203 * </pre>
204 * <p>
205 * To configure the check to restrict an empty file:
206 * </p>
207 * <pre>
208 * &lt;module name=&quot;RegexpMultiline&quot;&gt;
209 *     &lt;property name=&quot;format&quot; value=&quot;^\s*$&quot; /&gt;
210 *     &lt;property name=&quot;matchAcrossLines&quot; value=&quot;true&quot; /&gt;
211 *     &lt;property name=&quot;message&quot; value=&quot;Empty file is not allowed&quot; /&gt;
212 * &lt;/module&gt;
213 * </pre>
214 * <p>
215 * Example of violation from the above config:
216 * </p>
217 * <pre>
218 * /var/tmp$ cat -n Test.java
219 * 1
220 * 2
221 * 3
222 * 4
223 * </pre>
224 * <p>Result:</p>
225 * <pre>
226 * /var/tmp/Test.java // violation, a file must not be empty.
227 * </pre>
228 * <p>
229 * Parent is {@code com.puppycrawl.tools.checkstyle.Checker}
230 * </p>
231 * <p>
232 * Violation Message Keys:
233 * </p>
234 * <ul>
235 * <li>
236 * {@code regexp.StackOverflowError}
237 * </li>
238 * <li>
239 * {@code regexp.empty}
240 * </li>
241 * <li>
242 * {@code regexp.exceeded}
243 * </li>
244 * <li>
245 * {@code regexp.minimum}
246 * </li>
247 * </ul>
248 *
249 * @since 5.0
250 */
251@StatelessCheck
252public class RegexpMultilineCheck extends AbstractFileSetCheck {
253
254    /** Specify the format of the regular expression to match. */
255    @XdocsPropertyType(PropertyType.PATTERN)
256    private String format = "$.";
257    /**
258     * Specify the message which is used to notify about violations,
259     * if empty then default (hard-coded) message is used.
260     */
261    private String message;
262    /** Specify the minimum number of matches required in each file. */
263    private int minimum;
264    /** Specify the maximum number of matches required in each file. */
265    private int maximum;
266    /** Control whether to ignore case when searching. */
267    private boolean ignoreCase;
268    /** Control whether to match expressions across multiple lines. */
269    private boolean matchAcrossLines;
270
271    /** The detector to use. */
272    private MultilineDetector detector;
273
274    @Override
275    public void beginProcessing(String charset) {
276        final DetectorOptions options = DetectorOptions.newBuilder()
277            .reporter(this)
278            .compileFlags(getRegexCompileFlags())
279            .format(format)
280            .message(message)
281            .minimum(minimum)
282            .maximum(maximum)
283            .ignoreCase(ignoreCase)
284            .build();
285        detector = new MultilineDetector(options);
286    }
287
288    @Override
289    protected void processFiltered(File file, FileText fileText) {
290        detector.processLines(fileText);
291    }
292
293    /**
294     * Retrieves the compile flags for the regular expression being built based
295     * on {@code matchAcrossLines}.
296     *
297     * @return The compile flags.
298     */
299    private int getRegexCompileFlags() {
300        final int result;
301
302        if (matchAcrossLines) {
303            result = Pattern.DOTALL;
304        }
305        else {
306            result = Pattern.MULTILINE;
307        }
308
309        return result;
310    }
311
312    /**
313     * Setter to specify the format of the regular expression to match.
314     *
315     * @param format the format of the regular expression to match.
316     */
317    public void setFormat(String format) {
318        this.format = format;
319    }
320
321    /**
322     * Setter to specify the message which is used to notify about violations,
323     * if empty then default (hard-coded) message is used.
324     *
325     * @param message the message to report for a match.
326     */
327    public void setMessage(String message) {
328        this.message = message;
329    }
330
331    /**
332     * Setter to specify the minimum number of matches required in each file.
333     *
334     * @param minimum the minimum number of matches required in each file.
335     */
336    public void setMinimum(int minimum) {
337        this.minimum = minimum;
338    }
339
340    /**
341     * Setter to specify the maximum number of matches required in each file.
342     *
343     * @param maximum the maximum number of matches required in each file.
344     */
345    public void setMaximum(int maximum) {
346        this.maximum = maximum;
347    }
348
349    /**
350     * Setter to control whether to ignore case when searching.
351     *
352     * @param ignoreCase whether to ignore case when searching.
353     */
354    public void setIgnoreCase(boolean ignoreCase) {
355        this.ignoreCase = ignoreCase;
356    }
357
358    /**
359     * Setter to control whether to match expressions across multiple lines.
360     *
361     * @param matchAcrossLines whether to match expressions across multiple lines.
362     */
363    public void setMatchAcrossLines(boolean matchAcrossLines) {
364        this.matchAcrossLines = matchAcrossLines;
365    }
366
367}