001//////////////////////////////////////////////////////////////////////////////// 002// checkstyle: Checks Java source code for adherence to a set of rules. 003// Copyright (C) 2001-2022 the original author or authors. 004// 005// This library is free software; you can redistribute it and/or 006// modify it under the terms of the GNU Lesser General Public 007// License as published by the Free Software Foundation; either 008// version 2.1 of the License, or (at your option) any later version. 009// 010// This library is distributed in the hope that it will be useful, 011// but WITHOUT ANY WARRANTY; without even the implied warranty of 012// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 013// Lesser General Public License for more details. 014// 015// You should have received a copy of the GNU Lesser General Public 016// License along with this library; if not, write to the Free Software 017// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 018//////////////////////////////////////////////////////////////////////////////// 019 020package com.puppycrawl.tools.checkstyle.utils; 021 022import java.io.File; 023import java.io.IOException; 024import java.util.ArrayList; 025import java.util.List; 026import java.util.Locale; 027import java.util.Set; 028import java.util.regex.Pattern; 029import java.util.stream.Collectors; 030import java.util.stream.Stream; 031 032import com.puppycrawl.tools.checkstyle.AstTreeStringPrinter; 033import com.puppycrawl.tools.checkstyle.JavaParser; 034import com.puppycrawl.tools.checkstyle.api.CheckstyleException; 035import com.puppycrawl.tools.checkstyle.api.DetailAST; 036import com.puppycrawl.tools.checkstyle.api.TokenTypes; 037import com.puppycrawl.tools.checkstyle.xpath.AbstractNode; 038import com.puppycrawl.tools.checkstyle.xpath.ElementNode; 039import com.puppycrawl.tools.checkstyle.xpath.RootNode; 040import net.sf.saxon.Configuration; 041import net.sf.saxon.om.Item; 042import net.sf.saxon.sxpath.XPathDynamicContext; 043import net.sf.saxon.sxpath.XPathEvaluator; 044import net.sf.saxon.sxpath.XPathExpression; 045import net.sf.saxon.trans.XPathException; 046 047/** 048 * Contains utility methods for xpath. 049 * 050 */ 051public final class XpathUtil { 052 053 /** 054 * List of token types which support text attribute. 055 * These token types were selected based on analysis that all others do not match required 056 * criteria - text attribute of the token must be useful and help to retrieve more precise 057 * results. 058 * There are three types of AST tokens: 059 * 1. Tokens for which the texts are equal to the name of the token. Or in other words, 060 * nodes for which the following expression is always true: 061 * <pre> 062 * detailAst.getText().equals(TokenUtil.getTokenName(detailAst.getType())) 063 * </pre> 064 * For example: 065 * <pre> 066 * //MODIFIERS[@text='MODIFIERS'] 067 * //OBJBLOCK[@text='OBJBLOCK'] 068 * </pre> 069 * These tokens do not match required criteria because their texts do not carry any additional 070 * information, they do not affect the xpath requests and do not help to get more accurate 071 * results. The texts of these nodes are useless. No matter what code you analyze, these 072 * texts are always the same. 073 * In addition, they make xpath queries more complex, less readable and verbose. 074 * 2. Tokens for which the texts differ from token names, but texts are always constant. 075 * For example: 076 * <pre> 077 * //LITERAL_VOID[@text='void'] 078 * //RCURLY[@text='}'] 079 * </pre> 080 * These tokens are not used for the same reasons as were described in the previous part. 081 * 3. Tokens for which texts are not constant. The texts of these nodes are closely related 082 * to a concrete class, method, variable and so on. 083 * For example: 084 * <pre> 085 * String greeting = "HelloWorld"; 086 * //STRING_LITERAL[@text='HelloWorld'] 087 * </pre> 088 * <pre> 089 * int year = 2017; 090 * //NUM_INT[@text=2017] 091 * </pre> 092 * <pre> 093 * int age = 23; 094 * //NUM_INT[@text=23] 095 * </pre> 096 * As you can see same {@code NUM_INT} token type can have different texts, depending on 097 * context. 098 * <pre> 099 * public class MyClass {} 100 * //IDENT[@text='MyClass'] 101 * </pre> 102 * Only these tokens support text attribute because they make our xpath queries more accurate. 103 * These token types are listed below. 104 * */ 105 private static final Set<Integer> TOKEN_TYPES_WITH_TEXT_ATTRIBUTE = 106 Stream.of( 107 TokenTypes.IDENT, TokenTypes.STRING_LITERAL, TokenTypes.CHAR_LITERAL, 108 TokenTypes.NUM_LONG, TokenTypes.NUM_INT, TokenTypes.NUM_DOUBLE, TokenTypes.NUM_FLOAT, 109 TokenTypes.TEXT_BLOCK_CONTENT, TokenTypes.COMMENT_CONTENT) 110 .collect(Collectors.toSet()); 111 112 /** 113 * This regexp is used to convert new line to newline tag. 114 */ 115 private static final Pattern NEWLINE_TO_TAG = Pattern.compile("[\n]"); 116 117 /** 118 * This regexp is used to convert carriage return to carriage-return tag. 119 */ 120 private static final Pattern CARRIAGE_RETURN_TO_TAG = Pattern.compile("[\r]"); 121 122 /** Delimiter to separate xpath results. */ 123 private static final String DELIMITER = "---------" + System.lineSeparator(); 124 125 /** Stop instances being created. **/ 126 private XpathUtil() { 127 } 128 129 /** 130 * Iterates siblings of the given node and creates new Xpath-nodes. 131 * 132 * @param root the root node 133 * @param parent the parent node 134 * @param firstChild the first DetailAST 135 * @return children list 136 */ 137 public static List<AbstractNode> createChildren(AbstractNode root, AbstractNode parent, 138 DetailAST firstChild) { 139 DetailAST currentChild = firstChild; 140 final int depth = parent.getDepth() + 1; 141 final List<AbstractNode> result = new ArrayList<>(); 142 while (currentChild != null) { 143 final int index = result.size(); 144 final ElementNode child = new ElementNode(root, parent, currentChild, depth, index); 145 result.add(child); 146 currentChild = currentChild.getNextSibling(); 147 } 148 return result; 149 } 150 151 /** 152 * Checks, if specified node can have {@code @text} attribute. 153 * 154 * @param ast {@code DetailAst} element 155 * @return true if element supports {@code @text} attribute, false otherwise 156 */ 157 public static boolean supportsTextAttribute(DetailAST ast) { 158 return TOKEN_TYPES_WITH_TEXT_ATTRIBUTE.contains(ast.getType()); 159 } 160 161 /** 162 * Returns content of the text attribute of the ast element. 163 * 164 * @param ast {@code DetailAst} element 165 * @return text attribute of the ast element 166 */ 167 public static String getTextAttributeValue(DetailAST ast) { 168 String text = ast.getText(); 169 if (ast.getType() == TokenTypes.STRING_LITERAL) { 170 text = text.substring(1, text.length() - 1); 171 } 172 text = CARRIAGE_RETURN_TO_TAG.matcher(text).replaceAll("\\\\r"); 173 return NEWLINE_TO_TAG.matcher(text).replaceAll("\\\\n"); 174 } 175 176 /** 177 * Returns xpath query results on file as string. 178 * 179 * @param xpath query to evaluate 180 * @param file file to run on 181 * @return all results as string separated by delimiter 182 * @throws CheckstyleException if some parsing error happens 183 * @throws IOException if an error occurs 184 */ 185 public static String printXpathBranch(String xpath, File file) throws CheckstyleException, 186 IOException { 187 final XPathEvaluator xpathEvaluator = new XPathEvaluator(Configuration.newConfiguration()); 188 try { 189 final RootNode rootNode = new RootNode(JavaParser.parseFile(file, 190 JavaParser.Options.WITH_COMMENTS)); 191 final XPathExpression xpathExpression = xpathEvaluator.createExpression(xpath); 192 final XPathDynamicContext xpathDynamicContext = 193 xpathExpression.createDynamicContext(rootNode); 194 final List<Item> matchingItems = xpathExpression.evaluate(xpathDynamicContext); 195 return matchingItems.stream() 196 .map(item -> ((AbstractNode) item).getUnderlyingNode()) 197 .map(AstTreeStringPrinter::printBranch) 198 .collect(Collectors.joining(DELIMITER)); 199 } 200 catch (XPathException ex) { 201 final String errMsg = String.format(Locale.ROOT, 202 "Error during evaluation for xpath: %s, file: %s", xpath, file.getCanonicalPath()); 203 throw new CheckstyleException(errMsg, ex); 204 } 205 } 206 207}