class GpuOrcPartitionReader extends FilePartitionReaderBase with OrcPartitionReaderBase
A PartitionReader that reads an ORC file split on the GPU.
Efficiently reading an ORC split on the GPU requires rebuilding the ORC file in memory such that only relevant data is present in the memory file. This avoids sending unnecessary data to the GPU and saves GPU memory.
- Alphabetic
- By Inheritance
- GpuOrcPartitionReader
- OrcPartitionReaderBase
- OrcCommonFunctions
- OrcCodecWritingHelper
- FilePartitionReaderBase
- ScanWithMetrics
- Logging
- PartitionReader
- Closeable
- AutoCloseable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
GpuOrcPartitionReader(conf: Configuration, partFile: PartitionedFile, ctx: OrcPartitionReaderContext, readDataSchema: StructType, debugDumpPrefix: Option[String], debugDumpAlways: Boolean, maxReadBatchSizeRows: Integer, maxReadBatchSizeBytes: Long, targetBatchSizeBytes: Long, useChunkedReader: Boolean, maxChunkedReaderMemoryUsageSizeBytes: Long, execMetrics: Map[String, GpuMetric], isCaseSensitive: Boolean)
- conf
Hadoop configuration
- partFile
file split to read
- ctx
the context to provide some necessary information
- readDataSchema
Spark schema of what will be read from the file
- debugDumpPrefix
path prefix for dumping the memory file or null
- debugDumpAlways
whether to always debug dump or only on errors
- maxReadBatchSizeRows
maximum number of rows to read in a batch
- maxReadBatchSizeBytes
maximum number of bytes to read in a batch
- targetBatchSizeBytes
the target size of a batch
- useChunkedReader
whether to read Parquet by chunks or read all at once
- maxChunkedReaderMemoryUsageSizeBytes
soft limit on the number of bytes of internal memory usage that the reader will use
- execMetrics
metrics to update during read
- isCaseSensitive
whether the name check should be case sensitive or not
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
var
batchIter: Iterator[ColumnarBatch]
- Attributes
- protected
- Definition Classes
- FilePartitionReaderBase
-
def
buildReaderSchema(updatedSchema: TypeDescription, requestedMapping: Option[Array[Int]]): TypeDescription
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
buildReaderSchema(ctx: OrcPartitionReaderContext): TypeDescription
Get the ORC schema corresponding to the file being constructed for the GPU
Get the ORC schema corresponding to the file being constructed for the GPU
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
final
def
calculateFileTailSize(ctx: OrcPartitionReaderContext, footerStartOffset: Long, stripes: Seq[OrcOutputStripe]): Long
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
close(): Unit
- Definition Classes
- FilePartitionReaderBase → Closeable → AutoCloseable
-
val
conf: Configuration
- Definition Classes
- GpuOrcPartitionReader → OrcCommonFunctions
-
def
copyStripeData(dataReader: GpuOrcDataReader, out: HostMemoryOutputStream, inputDataRanges: DiskRangeList): Unit
Copy the stripe to the channel
Copy the stripe to the channel
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
currentMetricsValues(): Array[CustomTaskMetric]
- Definition Classes
- PartitionReader
-
val
debugDumpAlways: Boolean
Whether to always debug dump or only on errors
Whether to always debug dump or only on errors
- Definition Classes
- GpuOrcPartitionReader → OrcCommonFunctions
-
val
debugDumpPrefix: Option[String]
Whether debug dumping is enabled and the path prefix where to dump
Whether debug dumping is enabled and the path prefix where to dump
- Definition Classes
- GpuOrcPartitionReader → OrcCommonFunctions
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
estimateOutputSizeFromBlocks(blocks: Seq[OrcStripeWithMeta]): Long
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
get(): ColumnarBatch
- Definition Classes
- FilePartitionReaderBase → PartitionReader
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getORCOptionsAndSchema(memFileSchema: TypeDescription, requestedMapping: Option[Array[Int]], readDataSchema: StructType): (ORCOptions, TypeDescription)
- Definition Classes
- OrcCommonFunctions
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
var
isDone: Boolean
- Attributes
- protected
- Definition Classes
- FilePartitionReaderBase
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isNeedToSplitDataBlock(curMeta: OrcBlockMetaForSplitCheck, nextMeta: OrcBlockMetaForSplitCheck): Boolean
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
metrics: Map[String, GpuMetric]
- Definition Classes
- ScanWithMetrics
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
next(): Boolean
- Definition Classes
- GpuOrcPartitionReader → PartitionReader
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
populateCurrentBlockChunk(blockIterator: BufferedIterator[OrcOutputStripe], maxReadBatchSizeRows: Int, maxReadBatchSizeBytes: Long): Seq[OrcOutputStripe]
- Definition Classes
- OrcPartitionReaderBase
-
val
readDataSchema: StructType
- Definition Classes
- GpuOrcPartitionReader → OrcCommonFunctions
-
def
readPartFile(ctx: OrcPartitionReaderContext, stripes: Seq[OrcOutputStripe]): (HostMemoryBuffer, Long)
Read the stripes into HostMemoryBuffer.
Read the stripes into HostMemoryBuffer.
- ctx
the context to provide some necessary information
- stripes
a sequence of Stripe to be read into HostMemeoryBuffer
- returns
HostMemeoryBuffer and its data size
- Attributes
- protected
- Definition Classes
- OrcPartitionReaderBase
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
implicit
def
toDataStripes(stripes: Seq[DataBlockBase]): Seq[OrcStripeWithMeta]
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
implicit
def
toStripe(block: DataBlockBase): OrcStripeWithMeta
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
withCodecOutputStream[T](ctx: OrcPartitionReaderContext, out: OutputStream)(block: (OrcProtoWriterShim) ⇒ T): T
Executes the provided code block in the codec environment
Executes the provided code block in the codec environment
- Definition Classes
- OrcCodecWritingHelper
-
final
def
writeOrcFileHeader(outStream: HostMemoryOutputStream): Long
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions
-
final
def
writeOrcFileTail(outStream: HostMemoryOutputStream, ctx: OrcPartitionReaderContext, footerStartOffset: Long, stripes: Seq[OrcOutputStripe]): Unit
write the ORC file footer and PostScript
write the ORC file footer and PostScript
- Attributes
- protected
- Definition Classes
- OrcCommonFunctions