public final class WordExtractor extends java.lang.Object implements POIOLE2TextExtractor
| Constructor and Description |
|---|
WordExtractor(DirectoryNode dir) |
WordExtractor(HWPFDocument doc)
Create a new Word Extractor
|
WordExtractor(java.io.InputStream is)
Create a new Word Extractor
|
WordExtractor(POIFSFileSystem fs)
Create a new Word Extractor
|
| Modifier and Type | Method and Description |
|---|---|
java.lang.String[] |
getCommentsText() |
HWPFDocument |
getDocument() |
java.lang.String[] |
getEndnoteText() |
HWPFDocument |
getFilesystem() |
java.lang.String |
getFooterText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getFootnoteText() |
java.lang.String |
getHeaderText()
Deprecated.
3.8 beta 4
|
java.lang.String[] |
getMainTextboxText() |
java.lang.String[] |
getParagraphText()
Get the text from the word file, as an array with one String per
paragraph
|
java.lang.String |
getText()
Grab the text, based on the WordToTextConverter.
|
java.lang.String |
getTextFromPieces()
Grab the text out of the text pieces.
|
boolean |
isCloseFilesystem() |
void |
setCloseFilesystem(boolean doCloseFilesystem) |
static java.lang.String |
stripFields(java.lang.String text)
Removes any fields (eg macros, page markers etc) from the string.
|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, waitgetDocSummaryInformation, getMetadataTextExtractor, getRoot, getSummaryInformationclosepublic WordExtractor(java.io.InputStream is)
throws java.io.IOException
is - InputStream containing the word filejava.io.IOExceptionpublic WordExtractor(POIFSFileSystem fs) throws java.io.IOException
fs - POIFSFileSystem containing the word filejava.io.IOExceptionpublic WordExtractor(DirectoryNode dir) throws java.io.IOException
java.io.IOExceptionpublic WordExtractor(HWPFDocument doc)
doc - The HWPFDocument to extract frompublic java.lang.String[] getParagraphText()
public java.lang.String[] getFootnoteText()
public java.lang.String[] getMainTextboxText()
public java.lang.String[] getEndnoteText()
public java.lang.String[] getCommentsText()
@Deprecated public java.lang.String getHeaderText()
@Deprecated public java.lang.String getFooterText()
public java.lang.String getTextFromPieces()
public java.lang.String getText()
getText in interface POITextExtractorpublic static java.lang.String stripFields(java.lang.String text)
public HWPFDocument getDocument()
getDocument in interface POIOLE2TextExtractorgetDocument in interface POITextExtractorpublic void setCloseFilesystem(boolean doCloseFilesystem)
setCloseFilesystem in interface POITextExtractorpublic boolean isCloseFilesystem()
isCloseFilesystem in interface POITextExtractorpublic HWPFDocument getFilesystem()
getFilesystem in interface POITextExtractor