case class GpuParquetMultiFilePartitionReaderFactory(sqlConf: SQLConf, broadcastedConf: Broadcast[SerializableConfiguration], dataSchema: StructType, readDataSchema: StructType, partitionSchema: StructType, filters: Array[Filter], rapidsConf: RapidsConf, metrics: Map[String, GpuMetric], queryUsesInputFile: Boolean, alluxioPathReplacementMap: Option[Map[String, String]]) extends MultiFilePartitionReaderFactoryBase with Product with Serializable
Similar to GpuParquetPartitionReaderFactory but extended for reading multiple files in an iteration. This will allow us to read multiple small files and combine them on the CPU side before sending them down to the GPU.
- Alphabetic
- By Inheritance
- GpuParquetMultiFilePartitionReaderFactory
- Serializable
- Product
- Equals
- MultiFilePartitionReaderFactoryBase
- Logging
- PartitionReaderFactory
- Serializable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new GpuParquetMultiFilePartitionReaderFactory(sqlConf: SQLConf, broadcastedConf: Broadcast[SerializableConfiguration], dataSchema: StructType, readDataSchema: StructType, partitionSchema: StructType, filters: Array[Filter], rapidsConf: RapidsConf, metrics: Map[String, GpuMetric], queryUsesInputFile: Boolean, alluxioPathReplacementMap: Option[Map[String, String]])
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val alluxioPathReplacementMap: Option[Map[String, String]]
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- val broadcastedConf: Broadcast[SerializableConfiguration]
-
def
buildBaseColumnarReaderForCloud(files: Array[PartitionedFile], conf: Configuration): PartitionReader[ColumnarBatch]
Build the PartitionReader for cloud reading
Build the PartitionReader for cloud reading
- files
files to be read
- conf
configuration
- returns
cloud reading PartitionReader
- Definition Classes
- GpuParquetMultiFilePartitionReaderFactory → MultiFilePartitionReaderFactoryBase
-
def
buildBaseColumnarReaderForCoalescing(origFiles: Array[PartitionedFile], conf: Configuration): PartitionReader[ColumnarBatch]
Build the PartitionReader for coalescing reading
Build the PartitionReader for coalescing reading
- conf
the configuration
- returns
coalescing reading PartitionReader
- Definition Classes
- GpuParquetMultiFilePartitionReaderFactory → MultiFilePartitionReaderFactoryBase
-
val
canUseCoalesceFilesReader: Boolean
An abstract method to indicate if coalescing reading can be used
An abstract method to indicate if coalescing reading can be used
- Definition Classes
- GpuParquetMultiFilePartitionReaderFactory → MultiFilePartitionReaderFactoryBase
-
val
canUseMultiThreadReader: Boolean
An abstract method to indicate if cloud reading can be used
An abstract method to indicate if cloud reading can be used
- Definition Classes
- GpuParquetMultiFilePartitionReaderFactory → MultiFilePartitionReaderFactoryBase
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
createColumnarReader(partition: InputPartition): PartitionReader[ColumnarBatch]
- Definition Classes
- MultiFilePartitionReaderFactoryBase → PartitionReaderFactory
-
def
createReader(partition: InputPartition): PartitionReader[InternalRow]
- Definition Classes
- MultiFilePartitionReaderFactoryBase → PartitionReaderFactory
- val dataSchema: StructType
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- val filters: Array[Filter]
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
getFileFormatShortName: String
File format short name used for logging and other things to uniquely identity which file format is being used.
File format short name used for logging and other things to uniquely identity which file format is being used.
- returns
the file format short name
- Definition Classes
- GpuParquetMultiFilePartitionReaderFactory → MultiFilePartitionReaderFactoryBase
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
maxChunkedReaderMemoryUsageSizeBytes: Long
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
-
val
maxGpuColumnSizeBytes: Long
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
-
val
maxReadBatchSizeBytes: Long
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
-
val
maxReadBatchSizeRows: Int
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
- val metrics: Map[String, GpuMetric]
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val partitionSchema: StructType
- val queryUsesInputFile: Boolean
- val rapidsConf: RapidsConf
- val readDataSchema: StructType
- val sqlConf: SQLConf
-
def
supportColumnarReads(partition: InputPartition): Boolean
- Definition Classes
- MultiFilePartitionReaderFactoryBase → PartitionReaderFactory
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
val
targetBatchSizeBytes: Long
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
-
val
useChunkedReader: Boolean
- Attributes
- protected
- Definition Classes
- MultiFilePartitionReaderFactoryBase
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()