public class WordExtractor extends Object
| Modifier and Type | Class and Description |
|---|---|
static class |
WordExtractor.TagAndStyle |
| Constructor and Description |
|---|
WordExtractor(org.apache.tika.parser.ParseContext context) |
| Modifier and Type | Method and Description |
|---|---|
static WordExtractor.TagAndStyle |
buildParagraphTagAndStyle(String styleName,
boolean isTable)
Given a style name, return what tag should be used, and
what style should be applied to it.
|
protected org.apache.tika.detect.Detector |
getDetector() |
protected org.apache.tika.mime.MimeTypes |
getMimeTypes() |
protected String |
getPassword()
Returns the password to be used for this file, or null
if no / default password should be used
|
protected org.apache.tika.config.TikaConfig |
getTikaConfig() |
protected void |
handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
org.apache.tika.sax.XHTMLContentHandler xhtml)
Handle an office document that's embedded at the POIFS level
|
protected void |
handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource,
String filename,
String relationshipID,
String mediaType,
org.apache.tika.sax.XHTMLContentHandler xhtml,
boolean outputHtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected void |
parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected void |
parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected void parse(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionprotected void parse(org.apache.poi.poifs.filesystem.DirectoryNode root,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.NPOIFSFileSystem filesystem,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionprotected void parseWord6(org.apache.poi.poifs.filesystem.DirectoryNode root,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionpublic static WordExtractor.TagAndStyle buildParagraphTagAndStyle(String styleName, boolean isTable)
protected org.apache.tika.config.TikaConfig getTikaConfig()
protected org.apache.tika.detect.Detector getDetector()
protected org.apache.tika.mime.MimeTypes getMimeTypes()
protected String getPassword()
protected void handleEmbeddedResource(org.apache.tika.io.TikaInputStream resource,
String filename,
String relationshipID,
String mediaType,
org.apache.tika.sax.XHTMLContentHandler xhtml,
boolean outputHtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionprotected void handleEmbeddedOfficeDoc(org.apache.poi.poifs.filesystem.DirectoryEntry dir,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws IOException,
SAXException,
org.apache.tika.exception.TikaException
IOExceptionSAXExceptionorg.apache.tika.exception.TikaExceptionCopyright © 2007-2015 The Apache Software Foundation. All Rights Reserved.