abstract class GpuTextBasedPartitionReader[BUFF <: LineBufferer, FACT <: LineBuffererFactory[BUFF]] extends PartitionReader[ColumnarBatch] with ScanWithMetrics
The text based PartitionReader
- Alphabetic
- By Inheritance
- GpuTextBasedPartitionReader
- ScanWithMetrics
- PartitionReader
- Closeable
- AutoCloseable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
-
new
GpuTextBasedPartitionReader(conf: Configuration, partFile: PartitionedFile, dataSchema: StructType, readDataSchema: StructType, lineSeparatorInRead: Option[Array[Byte]], maxRowsPerChunk: Integer, maxBytesPerChunk: Long, execMetrics: Map[String, GpuMetric], bufferFactory: FACT)
- conf
the Hadoop configuration
- partFile
file split to read
- dataSchema
schema of the data
- readDataSchema
the Spark schema describing what will be read
- lineSeparatorInRead
An optional byte line sep.
- maxRowsPerChunk
maximum number of rows to read in a batch
- maxBytesPerChunk
maximum number of bytes to read in a batch
- execMetrics
metrics to update during read
Abstract Value Members
- abstract def castStringToBool(input: ColumnVector): ColumnVector
- abstract def dateFormat: Option[String]
-
abstract
def
getFileFormatShortName: String
File format short name used for logging and other things to uniquely identity which file format is being used.
File format short name used for logging and other things to uniquely identity which file format is being used.
- returns
the file format short name
-
abstract
def
readToTable(dataBuffer: BUFF, cudfDataSchema: Schema, readDataSchema: StructType, cudfReadDataSchema: Schema, isFirstChunk: Boolean, decodeTime: GpuMetric): Table
Read the host buffer to GPU table
Read the host buffer to GPU table
- dataBuffer
where the data is buffered
- cudfDataSchema
the cudf schema of the data
- readDataSchema
the Spark schema describing what will be read
- cudfReadDataSchema
the cudf schema of just the data we want to read.
- isFirstChunk
if it is the first chunk
- returns
table
- abstract def timestampFormat: String
Concrete Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- def castStringToDate(input: ColumnVector, dt: DType): ColumnVector
- def castStringToDecimal(input: ColumnVector, dt: DecimalType): ColumnVector
- def castStringToFloat(input: ColumnVector, dt: DType): ColumnVector
- def castStringToInt(input: ColumnVector, intType: DType): ColumnVector
- def castStringToTimestamp(lhs: ColumnVector, sparkFormat: String, dtype: DType): ColumnVector
- def castTableToDesiredTypes(table: Table, readSchema: StructType): Table
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
close(): Unit
- Definition Classes
- GpuTextBasedPartitionReader → Closeable → AutoCloseable
-
def
currentMetricsValues(): Array[CustomTaskMetric]
- Definition Classes
- PartitionReader
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
get(): ColumnarBatch
- Definition Classes
- GpuTextBasedPartitionReader → PartitionReader
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getCudfSchema(dataSchema: StructType): Schema
-
def
handleResult(readDataSchema: StructType, table: Table): Option[Table]
Handle the table decoded by GPU
Handle the table decoded by GPU
Please note that, this function owns table which is supposed to be closed in this function But for the optimization, we just return the original table.
- readDataSchema
the Spark schema describing what will be read
- table
the table decoded by GPU
- returns
the new optional Table
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
metrics: Map[String, GpuMetric]
- Definition Classes
- ScanWithMetrics
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
next(): Boolean
- Definition Classes
- GpuTextBasedPartitionReader → PartitionReader
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()