case class DeltaParquetFileFormat(protocol: Protocol, metadata: Metadata, nullableRowTrackingFields: Boolean = false, optimizationsEnabled: Boolean = true, tablePath: Option[String] = None, isCDCRead: Boolean = false) extends ParquetFileFormat with Product with Serializable
A thin wrapper over the Parquet file format to support
- columns names without restrictions.
- populated a column from the deletion vector of this file (if exists) to indicate whether the row is deleted or not according to the deletion vector. Consumers of this scan can use the column values to filter out the deleted rows.
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- DeltaParquetFileFormat
- Product
- Equals
- ParquetFileFormat
- Serializable
- Logging
- DataSourceRegister
- FileFormat
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- Protected
Instance Constructors
Value Members
- final def !=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def ##: Int
- Definition Classes
- AnyRef → Any
- final def ==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- final def asInstanceOf[T0]: T0
- Definition Classes
- Any
- def buildReader(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow]
- Attributes
- protected
- Definition Classes
- FileFormat
- def buildReaderWithPartitionValues(sparkSession: SparkSession, dataSchema: StructType, partitionSchema: StructType, requiredSchema: StructType, filters: Seq[Filter], options: Map[String, String], hadoopConf: Configuration): (PartitionedFile) => Iterator[InternalRow]
- Definition Classes
- DeltaParquetFileFormat → ParquetFileFormat → FileFormat
- def clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.CloneNotSupportedException]) @native()
- val columnMappingMode: DeltaColumnMappingMode
- def copyWithDVInfo(tablePath: String, optimizationsEnabled: Boolean): DeltaParquetFileFormat
- def createFileMetadataCol(): AttributeReference
- Definition Classes
- FileFormat
- final def eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- def equals(other: Any): Boolean
We sometimes need to replace FileFormat within LogicalPlans, so we have to override
equalsto ensure file format changes are capturedWe sometimes need to replace FileFormat within LogicalPlans, so we have to override
equalsto ensure file format changes are captured- Definition Classes
- DeltaParquetFileFormat → Equals → ParquetFileFormat → AnyRef → Any
- def fileConstantMetadataExtractors: Map[String, (PartitionedFile) => Any]
- Definition Classes
- DeltaParquetFileFormat → FileFormat
- def finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.Throwable])
- final def getClass(): Class[_ <: AnyRef]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def hasTablePath: Boolean
- def hashCode(): Int
- Definition Classes
- DeltaParquetFileFormat → ParquetFileFormat → AnyRef → Any
- def inferSchema(sparkSession: SparkSession, parameters: Map[String, String], files: Seq[FileStatus]): Option[StructType]
- Definition Classes
- ParquetFileFormat → FileFormat
- def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
- val isCDCRead: Boolean
- final def isInstanceOf[T0]: Boolean
- Definition Classes
- Any
- def isSplitable(sparkSession: SparkSession, options: Map[String, String], path: Path): Boolean
- Definition Classes
- DeltaParquetFileFormat → ParquetFileFormat → FileFormat
- def isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
- def log: Logger
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logDebug(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logError(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logInfo(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logName: String
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logTrace(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
- def logWarning(msg: => String): Unit
- Attributes
- protected
- Definition Classes
- Logging
- val metadata: Metadata
- def metadataSchemaFields: Seq[StructField]
- Definition Classes
- DeltaParquetFileFormat → ParquetFileFormat → FileFormat
- final def ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
- final def notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- final def notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
- val nullableRowTrackingFields: Boolean
- val optimizationsEnabled: Boolean
- def prepareSchemaForRead(inputSchema: StructType): StructType
prepareSchemaForRead must only be used for parquet read.
prepareSchemaForRead must only be used for parquet read. It removes "PARQUET_FIELD_ID_METADATA_KEY" for name mapping mode which address columns by physical name instead of id.
- def prepareWrite(sparkSession: SparkSession, job: Job, options: Map[String, String], dataSchema: StructType): OutputWriterFactory
- Definition Classes
- DeltaParquetFileFormat → ParquetFileFormat → FileFormat
- def productElementNames: Iterator[String]
- Definition Classes
- Product
- val protocol: Protocol
- val referenceSchema: StructType
- def shortName(): String
- Definition Classes
- ParquetFileFormat → DataSourceRegister
- def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean
- Definition Classes
- ParquetFileFormat → FileFormat
- def supportDataType(dataType: DataType): Boolean
- Definition Classes
- ParquetFileFormat → FileFormat
- def supportFieldName(name: String): Boolean
- Definition Classes
- DeltaParquetFileFormat → FileFormat
- final def synchronized[T0](arg0: => T0): T0
- Definition Classes
- AnyRef
- val tablePath: Option[String]
- def toString(): String
- Definition Classes
- ParquetFileFormat → AnyRef → Any
- def vectorTypes(requiredSchema: StructType, partitionSchema: StructType, sqlConf: SQLConf): Option[Seq[String]]
- Definition Classes
- ParquetFileFormat → FileFormat
- final def wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException])
- final def wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws(classOf[java.lang.InterruptedException]) @native()