boolean needToCheck
boolean allowExtractionForAccessibility
PDFParserConfig defaultConfig
Set<E> userConfigured
boolean enableAutoSpace
boolean suppressDuplicateOverlappingText
boolean extractAnnotationText
boolean sortByPosition
boolean extractAcroFormContent
boolean extractBookmarksText
boolean extractInlineImages
boolean extractInlineImageMetadataOnly
ImageGraphicsEngineFactory imageGraphicsEngineFactory
boolean extractUniqueInlineImagesOnly
boolean extractMarkedContent
Float averageCharTolerance
Float spacingTolerance
float dropThreshold
boolean ifXFAExtractOnlyXFA
PDFParserConfig.OCR_STRATEGY ocrStrategy
PDFParserConfig.OCRStrategyAuto ocrStrategyAuto
PDFParserConfig.OCR_RENDERING_STRATEGY ocrRenderingStrategy
int ocrDPI
org.apache.pdfbox.rendering.ImageType ocrImageType
String ocrImageFormatName
float ocrImageQuality
PDFParserConfig.IMAGE_STRATEGY imageStrategy
AccessChecker accessChecker
boolean catchIntermediateIOExceptions
boolean extractActions
boolean extractFontNames
long maxMainMemoryBytes
boolean setKCMS
boolean detectAngles
org.apache.tika.renderer.Renderer renderer
float unmappedUnicodeCharsPerPage
int totalCharsPerPage
Copyright © 2007–2022 The Apache Software Foundation. All rights reserved.