class CSVPartitionReader extends CSVPartitionReaderBase[HostLineBufferer, HostLineBuffererFactory.type]
- Alphabetic
- By Inheritance
- CSVPartitionReader
- CSVPartitionReaderBase
- GpuTextBasedPartitionReader
- ScanWithMetrics
- PartitionReader
- Closeable
- AutoCloseable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new CSVPartitionReader(conf: Configuration, partFile: PartitionedFile, dataSchema: StructType, readDataSchema: StructType, parsedOptions: CSVOptions, maxRowsPerChunk: Integer, maxBytesPerChunk: Long, execMetrics: Map[String, GpuMetric])
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
- def buildCsvOptions(parsedOptions: CSVOptions, schema: StructType, hasHeader: Boolean): Builder
-
def
castStringToBool(input: ColumnVector): ColumnVector
CSV supports "true" and "false" (case-insensitive) as valid boolean values.
CSV supports "true" and "false" (case-insensitive) as valid boolean values.
- Definition Classes
- CSVPartitionReaderBase → GpuTextBasedPartitionReader
-
def
castStringToDate(input: ColumnVector, dt: DType): ColumnVector
- Definition Classes
- GpuTextBasedPartitionReader
-
def
castStringToDecimal(input: ColumnVector, dt: DecimalType): ColumnVector
- Definition Classes
- GpuTextBasedPartitionReader
-
def
castStringToFloat(input: ColumnVector, dt: DType): ColumnVector
- Definition Classes
- GpuTextBasedPartitionReader
-
def
castStringToInt(input: ColumnVector, intType: DType): ColumnVector
- Definition Classes
- GpuTextBasedPartitionReader
-
def
castStringToTimestamp(lhs: ColumnVector, sparkFormat: String, dtype: DType): ColumnVector
- Definition Classes
- GpuTextBasedPartitionReader
-
def
castTableToDesiredTypes(table: Table, readSchema: StructType): Table
- Definition Classes
- GpuTextBasedPartitionReader
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
close(): Unit
- Definition Classes
- GpuTextBasedPartitionReader → Closeable → AutoCloseable
-
def
currentMetricsValues(): Array[CustomTaskMetric]
- Definition Classes
- PartitionReader
-
def
dateFormat: Option[String]
- Definition Classes
- CSVPartitionReaderBase → GpuTextBasedPartitionReader
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
get(): ColumnarBatch
- Definition Classes
- GpuTextBasedPartitionReader → PartitionReader
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getCudfSchema(dataSchema: StructType): Schema
- Definition Classes
- GpuTextBasedPartitionReader
-
def
getFileFormatShortName: String
File format short name used for logging and other things to uniquely identity which file format is being used.
File format short name used for logging and other things to uniquely identity which file format is being used.
- returns
the file format short name
- Definition Classes
- CSVPartitionReaderBase → GpuTextBasedPartitionReader
-
def
handleResult(readDataSchema: StructType, table: Table): Option[Table]
Handle the table decoded by GPU
Handle the table decoded by GPU
Please note that, this function owns table which is supposed to be closed in this function But for the optimization, we just return the original table.
- readDataSchema
the Spark schema describing what will be read
- table
the table decoded by GPU
- returns
the new optional Table
- Definition Classes
- GpuTextBasedPartitionReader
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
metrics: Map[String, GpuMetric]
- Definition Classes
- ScanWithMetrics
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
next(): Boolean
- Definition Classes
- GpuTextBasedPartitionReader → PartitionReader
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
readToTable(dataBufferer: HostLineBufferer, cudfDataSchema: Schema, readDataSchema: StructType, cudfReadDataSchema: Schema, isFirstChunk: Boolean, decodeTime: GpuMetric): Table
Read the host buffer to GPU table
Read the host buffer to GPU table
- dataBufferer
buffered data to be parsed
- cudfDataSchema
the cudf schema of the data
- readDataSchema
the Spark schema describing what will be read
- cudfReadDataSchema
the cudf schema of just the data we want to read.
- isFirstChunk
if it is the first chunk
- returns
table
- Definition Classes
- CSVPartitionReader → GpuTextBasedPartitionReader
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
timestampFormat: String
- Definition Classes
- CSVPartitionReaderBase → GpuTextBasedPartitionReader
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()