public class ScrapingTransformer
extends org.codelibs.robot.transformer.impl.HtmlTransformer
| 修飾子とタイプ | フィールドと説明 |
|---|---|
protected ThreadLocal<Set<String>> |
childUrlSetLocal |
String[] |
copiedResonseDataFields |
protected RiverConfig |
riverConfig |
| コンストラクタと説明 |
|---|
ScrapingTransformer() |
| 修飾子とタイプ | メソッドと説明 |
|---|---|
protected void |
addPropertyData(Map<String,Object> dataMap,
String key,
Object value) |
protected void |
convertFlatMap(String prefix,
Map<String,Object> oldMap,
Map<String,Object> newMap) |
protected void |
deepCopy(Map<String,Object> oldMap,
Map<String,Object> newMap) |
Object |
getData(org.codelibs.robot.entity.AccessResultData accessResultData)
Returns data as XML content of String.
|
protected org.jsoup.nodes.Element[] |
getElements(org.jsoup.nodes.Element[] elements,
List<String> queries,
boolean isArrayProperty) |
protected org.jsoup.nodes.Element[] |
getElements(org.jsoup.nodes.Element[] elements,
String query) |
protected Method |
getQueryMethod(org.jsoup.nodes.Element element,
String queryType,
List<Object> argList) |
protected org.codelibs.elasticsearch.web.robot.transformer.ScrapingTransformer.ScriptInfo |
getScriptValue(Map<String,Object> params) |
void |
init() |
protected String |
loadCharset(InputStream inputStream,
int preloadSize) |
protected void |
processCssQuery(org.jsoup.nodes.Document document,
String propName,
Map<String,Object> params,
boolean isTrimSpaces,
List<String> strList) |
protected void |
processData(ScrapingRule scrapingRule,
File file,
org.codelibs.robot.entity.ResponseData responseData,
org.codelibs.robot.entity.ResultData resultData) |
protected void |
storeChildUrls(org.codelibs.robot.entity.ResponseData responseData,
org.codelibs.robot.entity.ResultData resultData) |
protected void |
storeData(org.codelibs.robot.entity.ResponseData responseData,
org.codelibs.robot.entity.ResultData resultData) |
protected void |
storeIndex(org.elasticsearch.client.Client client,
String indexName,
String typeName,
Map<String,Object> dataMap) |
protected void |
storeIndex(org.codelibs.robot.entity.ResponseData responseData,
Map<String,Object> dataMap) |
org.codelibs.robot.entity.ResultData |
transform(org.codelibs.robot.entity.ResponseData responseData) |
protected String |
trimSpaces(String value,
boolean trimSpaces) |
protected void |
updateCharset(org.codelibs.robot.entity.ResponseData responseData) |
addChildUrlFromTagAttribute, addChildUrlRule, addFeature, addProperty, convertChildUrlList, encodeUrl, getBaseHref, getChildUrlRuleMap, getDefaultEncoding, getDomParser, getDuplicateUrl, getFeatureMap, getInvalidUrlPattern, getPreloadSizeForCharset, getPropertyMap, getUrlFromTagAttribute, getXPathAPI, isHtml, isSupportedCharset, isValidPath, loadCharset, normalizeUrl, parseCharset, setChildUrlRuleMap, setDefaultEncoding, setFeatureMap, setInvalidUrlPattern, setPreloadSizeForCharset, setPropertyMappublic String[] copiedResonseDataFields
protected RiverConfig riverConfig
protected ThreadLocal<Set<String>> childUrlSetLocal
public void init()
public org.codelibs.robot.entity.ResultData transform(org.codelibs.robot.entity.ResponseData responseData)
transform インタフェース内 org.codelibs.robot.transformer.Transformertransform クラス内 org.codelibs.robot.transformer.impl.HtmlTransformerprotected void updateCharset(org.codelibs.robot.entity.ResponseData responseData)
updateCharset クラス内 org.codelibs.robot.transformer.impl.HtmlTransformerprotected String loadCharset(InputStream inputStream, int preloadSize)
protected void storeData(org.codelibs.robot.entity.ResponseData responseData,
org.codelibs.robot.entity.ResultData resultData)
storeData クラス内 org.codelibs.robot.transformer.impl.HtmlTransformerprotected void processData(ScrapingRule scrapingRule, File file, org.codelibs.robot.entity.ResponseData responseData, org.codelibs.robot.entity.ResultData resultData)
protected org.codelibs.elasticsearch.web.robot.transformer.ScrapingTransformer.ScriptInfo getScriptValue(Map<String,Object> params)
protected void processCssQuery(org.jsoup.nodes.Document document,
String propName,
Map<String,Object> params,
boolean isTrimSpaces,
List<String> strList)
protected Method getQueryMethod(org.jsoup.nodes.Element element, String queryType, List<Object> argList)
protected org.jsoup.nodes.Element[] getElements(org.jsoup.nodes.Element[] elements,
List<String> queries,
boolean isArrayProperty)
protected org.jsoup.nodes.Element[] getElements(org.jsoup.nodes.Element[] elements,
String query)
protected void addPropertyData(Map<String,Object> dataMap, String key, Object value)
protected void storeIndex(org.codelibs.robot.entity.ResponseData responseData,
Map<String,Object> dataMap)
protected void storeIndex(org.elasticsearch.client.Client client,
String indexName,
String typeName,
Map<String,Object> dataMap)
protected void convertFlatMap(String prefix, Map<String,Object> oldMap, Map<String,Object> newMap)
protected void storeChildUrls(org.codelibs.robot.entity.ResponseData responseData,
org.codelibs.robot.entity.ResultData resultData)
storeChildUrls クラス内 org.codelibs.robot.transformer.impl.HtmlTransformerpublic Object getData(org.codelibs.robot.entity.AccessResultData accessResultData)
getData インタフェース内 org.codelibs.robot.transformer.TransformergetData クラス内 org.codelibs.robot.transformer.impl.HtmlTransformerCopyright © 2011-2014. All Rights Reserved.