public class XSSFExcelExtractorDecorator extends AbstractOOXMLExtractor
| Modifier and Type | Class and Description |
|---|---|
protected static class |
XSSFExcelExtractorDecorator.HeaderFooterFromString |
protected static class |
XSSFExcelExtractorDecorator.SheetTextAsHTML
Turns formatted sheet events into HTML
|
protected static class |
XSSFExcelExtractorDecorator.XSSFSheetInterestingPartsCapturer
Captures information on interesting tags, whilst
delegating the main work to the formatting handler
|
| Modifier and Type | Field and Description |
|---|---|
protected Map<String,String> |
drawingHyperlinks |
protected org.apache.poi.ss.usermodel.DataFormatter |
formatter |
protected static org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper |
hfHelper
Allows access to headers/footers from raw xml strings
|
protected org.apache.tika.metadata.Metadata |
metadata |
protected org.apache.tika.parser.ParseContext |
parseContext |
protected List<org.apache.poi.openxml4j.opc.PackagePart> |
sheetParts |
config, EMBEDDED_RELATIONSHIPS, extractor| Constructor and Description |
|---|
XSSFExcelExtractorDecorator(org.apache.tika.parser.ParseContext context,
org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale) |
| Modifier and Type | Method and Description |
|---|---|
protected void |
addDrawingHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart) |
protected void |
buildXHTML(org.apache.tika.sax.XHTMLContentHandler xhtml)
Populates the
XHTMLContentHandler object received as parameter. |
protected void |
configureExtractor(org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale) |
protected void |
extractHeaderFooter(String hf,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected void |
extractHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
protected List<org.apache.poi.openxml4j.opc.PackagePart> |
getMainDocumentParts()
In Excel files, sheets have things embedded in them,
and sheet drawings which have the images
|
void |
getXHTML(ContentHandler handler,
org.apache.tika.metadata.Metadata metadata,
org.apache.tika.parser.ParseContext context)
Parses the document into a sequence of XHTML SAX events sent to the
given content handler.
|
protected void |
processShapes(List<org.apache.poi.xssf.usermodel.XSSFShape> shapes,
org.apache.tika.sax.XHTMLContentHandler xhtml) |
void |
processSheet(org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
org.apache.poi.xssf.model.Comments comments,
org.apache.poi.xssf.model.StylesTable styles,
org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable strings,
InputStream sheetInputStream) |
getDocument, getJustFileName, getMetadataExtractor, handleEmbeddedFile, loadLinkedRelationshipsprotected static org.apache.poi.xssf.usermodel.helpers.HeaderFooterHelper hfHelper
protected final org.apache.poi.ss.usermodel.DataFormatter formatter
protected final List<org.apache.poi.openxml4j.opc.PackagePart> sheetParts
protected org.apache.tika.metadata.Metadata metadata
protected org.apache.tika.parser.ParseContext parseContext
public XSSFExcelExtractorDecorator(org.apache.tika.parser.ParseContext context,
org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale)
protected void configureExtractor(org.apache.poi.ooxml.extractor.POIXMLTextExtractor extractor,
Locale locale)
public void getXHTML(ContentHandler handler, org.apache.tika.metadata.Metadata metadata, org.apache.tika.parser.ParseContext context) throws SAXException, org.apache.xmlbeans.XmlException, IOException, org.apache.tika.exception.TikaException
OOXMLExtractorgetXHTML in interface OOXMLExtractorgetXHTML in class AbstractOOXMLExtractorSAXExceptionorg.apache.xmlbeans.XmlExceptionIOExceptionorg.apache.tika.exception.TikaExceptionOOXMLExtractor.getXHTML(ContentHandler, Metadata,
ParseContext)protected void buildXHTML(org.apache.tika.sax.XHTMLContentHandler xhtml)
throws SAXException,
org.apache.xmlbeans.XmlException,
IOException
AbstractOOXMLExtractorXHTMLContentHandler object received as parameter.buildXHTML in class AbstractOOXMLExtractorSAXExceptionorg.apache.xmlbeans.XmlExceptionIOExceptionXSSFExcelExtractor.getText()protected void addDrawingHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart)
protected void extractHyperLinks(org.apache.poi.openxml4j.opc.PackagePart sheetPart,
org.apache.tika.sax.XHTMLContentHandler xhtml)
throws SAXException
SAXExceptionprotected void extractHeaderFooter(String hf, org.apache.tika.sax.XHTMLContentHandler xhtml) throws SAXException
SAXExceptionprotected void processShapes(List<org.apache.poi.xssf.usermodel.XSSFShape> shapes, org.apache.tika.sax.XHTMLContentHandler xhtml) throws SAXException
SAXExceptionpublic void processSheet(org.apache.poi.xssf.eventusermodel.XSSFSheetXMLHandler.SheetContentsHandler sheetContentsHandler,
org.apache.poi.xssf.model.Comments comments,
org.apache.poi.xssf.model.StylesTable styles,
org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable strings,
InputStream sheetInputStream)
throws IOException,
SAXException
IOExceptionSAXExceptionprotected List<org.apache.poi.openxml4j.opc.PackagePart> getMainDocumentParts() throws org.apache.tika.exception.TikaException
getMainDocumentParts in class AbstractOOXMLExtractororg.apache.tika.exception.TikaExceptionCopyright © 2007–2023 The Apache Software Foundation. All rights reserved.