Package technology.tabula
Class TextStripper
java.lang.Object
org.apache.pdfbox.contentstream.PDFStreamEngine
org.apache.pdfbox.text.PDFTextStripper
technology.tabula.TextStripper
public class TextStripper
extends org.apache.pdfbox.text.PDFTextStripper
-
Field Summary
Fields inherited from class org.apache.pdfbox.text.PDFTextStripper
charactersByArticle, LINE_SEPARATOR, output -
Constructor Summary
Constructors -
Method Summary
Modifier and TypeMethodDescriptionprotected floatcomputeFontHeight(org.apache.pdfbox.pdmodel.font.PDFont font) floatfloatvoidprocess()protected voidshowGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) protected voidwriteString(String string, List<org.apache.pdfbox.text.TextPosition> textPositions) Methods inherited from class org.apache.pdfbox.text.PDFTextStripper
endArticle, endDocument, endPage, getAddMoreFormatting, getArticleEnd, getArticleStart, getAverageCharTolerance, getCharactersByArticle, getCurrentPageNo, getDropThreshold, getEndBookmark, getEndPage, getIndentThreshold, getLineSeparator, getListItemPatterns, getOutput, getPageEnd, getPageStart, getParagraphEnd, getParagraphStart, getSeparateByBeads, getSortByPosition, getSpacingTolerance, getStartBookmark, getStartPage, getSuppressDuplicateOverlappingText, getText, getWordSeparator, matchPattern, processPage, processPages, processTextPosition, setAddMoreFormatting, setArticleEnd, setArticleStart, setAverageCharTolerance, setDropThreshold, setEndBookmark, setEndPage, setIndentThreshold, setLineSeparator, setListItemPatterns, setPageEnd, setPageStart, setParagraphEnd, setParagraphStart, setShouldSeparateByBeads, setSortByPosition, setSpacingTolerance, setStartBookmark, setStartPage, setSuppressDuplicateOverlappingText, setWordSeparator, startArticle, startArticle, startDocument, startPage, writeCharacters, writeLineSeparator, writePage, writePageEnd, writePageStart, writeParagraphEnd, writeParagraphSeparator, writeParagraphStart, writeString, writeText, writeWordSeparatorMethods inherited from class org.apache.pdfbox.contentstream.PDFStreamEngine
addOperator, applyTextAdjustment, beginMarkedContentSequence, beginText, decreaseLevel, endMarkedContentSequence, endText, getAppearance, getCurrentPage, getGraphicsStackSize, getGraphicsState, getInitialMatrix, getLevel, getResources, getTextLineMatrix, getTextMatrix, increaseLevel, operatorException, processAnnotation, processChildStream, processOperator, processOperator, processSoftMask, processTilingPattern, processTilingPattern, processTransparencyGroup, processType3Stream, registerOperatorProcessor, restoreGraphicsStack, restoreGraphicsState, saveGraphicsStack, saveGraphicsState, setLineDashPattern, setTextLineMatrix, setTextMatrix, showAnnotation, showFontGlyph, showFontGlyph, showForm, showGlyph, showText, showTextString, showTextStrings, showTransparencyGroup, showType3Glyph, showType3Glyph, transformedPoint, transformWidth, unsupportedOperator
-
Constructor Details
-
TextStripper
public TextStripper(org.apache.pdfbox.pdmodel.PDDocument document, int pageNumber) throws IOException - Throws:
IOException
-
-
Method Details
-
process
- Throws:
IOException
-
writeString
protected void writeString(String string, List<org.apache.pdfbox.text.TextPosition> textPositions) throws IOException - Overrides:
writeStringin classorg.apache.pdfbox.text.PDFTextStripper- Throws:
IOException
-
computeFontHeight
- Throws:
IOException
-
getTextElements
-
getSpatialIndex
-
getMinCharWidth
public float getMinCharWidth() -
getMinCharHeight
public float getMinCharHeight() -
showGlyph
protected void showGlyph(org.apache.pdfbox.util.Matrix arg0, org.apache.pdfbox.pdmodel.font.PDFont arg1, int arg2, String arg3, org.apache.pdfbox.util.Vector arg4) throws IOException - Overrides:
showGlyphin classorg.apache.pdfbox.contentstream.PDFStreamEngine- Throws:
IOException
-