case class ConvertToDeltaCommand(tableIdentifier: TableIdentifier, partitionSchema: Option[StructType], deltaPath: Option[String]) extends ConvertToDeltaCommandBase with Product with Serializable
- Alphabetic
- By Inheritance
- ConvertToDeltaCommand
- Serializable
- Serializable
- ConvertToDeltaCommandBase
- DeltaCommand
- DeltaLogging
- DatabricksLogging
- DeltaProgressReporter
- RunnableCommand
- Command
- LogicalPlan
- Logging
- QueryPlanConstraints
- ConstraintHelper
- LogicalPlanStats
- AnalysisHelper
- QueryPlan
- TreeNode
- Product
- Equals
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new ConvertToDeltaCommand(tableIdentifier: TableIdentifier, partitionSchema: Option[StructType], deltaPath: Option[String])
Type Members
-
case class
ConvertProperties(catalogTable: Option[CatalogTable], provider: Option[String], targetDir: String, properties: Map[String, String]) extends Product with Serializable
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
lazy val
allAttributes: AttributeSeq
- Definition Classes
- QueryPlan
-
def
analyzed: Boolean
- Definition Classes
- AnalysisHelper
-
def
apply(number: Int): TreeNode[_]
- Definition Classes
- TreeNode
-
def
argString: String
- Definition Classes
- TreeNode
-
def
asCode: String
- Definition Classes
- TreeNode
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
assertNotAnalysisRule(): Unit
- Attributes
- protected
- Definition Classes
- AnalysisHelper
-
def
buildBaseRelation(spark: SparkSession, txn: OptimisticTransaction, actionType: String, rootPath: Path, inputLeafFiles: Seq[String], nameToAddFileMap: Map[String, AddFile]): HadoopFsRelation
Build a base relation of files that need to be rewritten as part of an update/delete/merge operation.
Build a base relation of files that need to be rewritten as part of an update/delete/merge operation.
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
final
lazy val
canonicalized: LogicalPlan
- Definition Classes
- QueryPlan
- Annotations
- @transient()
-
def
children: Seq[LogicalPlan]
- Definition Classes
- Command → TreeNode
-
def
childrenResolved: Boolean
- Definition Classes
- LogicalPlan
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
collect[B](pf: PartialFunction[LogicalPlan, B]): Seq[B]
- Definition Classes
- TreeNode
-
def
collectFirst[B](pf: PartialFunction[LogicalPlan, B]): Option[B]
- Definition Classes
- TreeNode
-
def
collectLeaves(): Seq[LogicalPlan]
- Definition Classes
- TreeNode
-
def
conf: SQLConf
- Definition Classes
- QueryPlan
-
lazy val
constraints: ExpressionSet
- Definition Classes
- QueryPlanConstraints
-
def
constructIsNotNullConstraints(constraints: Set[Expression], output: Seq[Attribute]): Set[Expression]
- Definition Classes
- ConstraintHelper
-
def
constructTableSchema(spark: SparkSession, dataSchema: StructType, partitionFields: Seq[StructField]): StructType
Construct a table schema by merging data schema and partition schema.
Construct a table schema by merging data schema and partition schema. We follow the merge logic in org.apache.spark.sql.execution.datasources.HadoopFsRelation:
When data and partition schemas have overlapping columns, the output schema respects the order of the data schema for the overlapping columns, and it respects the data types of the partition schema.
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
lazy val
containsChild: Set[TreeNode[_]]
- Definition Classes
- TreeNode
-
def
createAddFile(file: SerializableFileStatus, basePath: Path, fs: FileSystem, conf: SQLConf): AddFile
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
- val deltaPath: Option[String]
-
def
doCanonicalize(): LogicalPlan
- Attributes
- protected
- Definition Classes
- QueryPlan
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
expressions: Seq[Expression]
- Definition Classes
- QueryPlan
-
def
fastEquals(other: TreeNode[_]): Boolean
- Definition Classes
- TreeNode
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
find(f: (LogicalPlan) ⇒ Boolean): Option[LogicalPlan]
- Definition Classes
- TreeNode
-
def
flatMap[A](f: (LogicalPlan) ⇒ TraversableOnce[A]): Seq[A]
- Definition Classes
- TreeNode
-
def
foreach(f: (LogicalPlan) ⇒ Unit): Unit
- Definition Classes
- TreeNode
-
def
foreachUp(f: (LogicalPlan) ⇒ Unit): Unit
- Definition Classes
- TreeNode
-
def
generateCandidateFileMap(basePath: Path, candidateFiles: Seq[AddFile]): Map[String, AddFile]
Generates a map of file names to add file entries for operations where we will need to rewrite files such as delete, merge, update.
Generates a map of file names to add file entries for operations where we will need to rewrite files such as delete, merge, update. We expect file names to be unique, because each file contains a UUID.
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
def
generateTreeString(depth: Int, lastChildren: Seq[Boolean], builder: StringBuilder, verbose: Boolean, prefix: String, addSuffix: Boolean): StringBuilder
- Definition Classes
- TreeNode
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getContext: Map[String, String]
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
def
getConvertProperties(spark: SparkSession, tableIdentifier: TableIdentifier): ConvertProperties
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
def
getTouchedFile(basePath: Path, filePath: String, nameToAddFileMap: Map[String, AddFile]): AddFile
Find the AddFile record corresponding to the file that was read as part of a delete/update/merge operation.
Find the AddFile record corresponding to the file that was read as part of a delete/update/merge operation.
- filePath
The path to a file. Can be either absolute or relative
- nameToAddFileMap
Map generated through
generateCandidateFileMap()
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
def
handleExistingTransactionLog(spark: SparkSession, txn: OptimisticTransaction, convertProperties: ConvertProperties): Unit
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
def
hashCode(): Int
- Definition Classes
- TreeNode → AnyRef → Any
-
def
inferAdditionalConstraints(constraints: Set[Expression]): Set[Expression]
- Definition Classes
- ConstraintHelper
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
innerChildren: Seq[QueryPlan[_]]
- Attributes
- protected
- Definition Classes
- QueryPlan → TreeNode
-
def
inputSet: AttributeSet
- Definition Classes
- QueryPlan
-
final
def
invalidateStatsCache(): Unit
- Definition Classes
- LogicalPlanStats
-
def
isCanonicalizedPlan: Boolean
- Attributes
- protected
- Definition Classes
- QueryPlan
-
def
isCatalogTable(analyzer: Analyzer, tableIdent: TableIdentifier): Boolean
Calls DeltaCommand.isCatalogTable.
Calls DeltaCommand.isCatalogTable. With Convert, we may get a format check error in cases where the metastore and the underlying table don't align, e.g. external table where the underlying files are converted to delta but the metadata has not been converted yet. In these cases, catch the error and return based on whether the provided Table Identifier could reasonably be a path
- analyzer
The session state analyzer to call
- tableIdent
Table Identifier to determine whether is path based or not
- returns
Boolean where true means that the table is a table in a metastore and false means the table is a path based table
- Definition Classes
- ConvertToDeltaCommandBase → DeltaCommand
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isPathIdentifier(tableIdent: TableIdentifier): Boolean
Override this method since parquet paths are valid for Convert
Override this method since parquet paths are valid for Convert
- tableIdent
the provided table or path
- returns
Whether or not the ident provided can refer to a table by path
- Definition Classes
- ConvertToDeltaCommandBase → DeltaCommand
-
def
isStreaming: Boolean
- Definition Classes
- LogicalPlan
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
jsonFields: List[JField]
- Attributes
- protected
- Definition Classes
- TreeNode
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logConsole(line: String): Unit
- Definition Classes
- DatabricksLogging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
makeCopy(newArgs: Array[AnyRef]): LogicalPlan
- Definition Classes
- TreeNode
-
def
map[A](f: (LogicalPlan) ⇒ A): Seq[A]
- Definition Classes
- TreeNode
-
def
mapChildren(f: (LogicalPlan) ⇒ LogicalPlan): LogicalPlan
- Definition Classes
- TreeNode
-
def
mapExpressions(f: (Expression) ⇒ Expression): ConvertToDeltaCommand.this.type
- Definition Classes
- QueryPlan
-
def
mapProductIterator[B](f: (Any) ⇒ B)(implicit arg0: ClassTag[B]): Array[B]
- Attributes
- protected
- Definition Classes
- TreeNode
-
def
maxRows: Option[Long]
- Definition Classes
- LogicalPlan
-
def
maxRowsPerPartition: Option[Long]
- Definition Classes
- LogicalPlan
-
def
mergeSchemasInParallel(sparkSession: SparkSession, filesToTouch: Seq[FileStatus]): Option[StructType]
This method is forked from ParquetFileFormat.
This method is forked from ParquetFileFormat. The only change here is that we use our SchemaUtils.mergeSchemas() instead of StructType.merge(), where we allow upcast between ByteType, ShortType and IntegerType.
Figures out a merged Parquet schema with a distributed Spark job.
Note that locality is not taken into consideration here because:
- For a single Parquet part-file, in most cases the footer only resides in the last block of
that file. Thus we only need to retrieve the location of the last block. However, Hadoop
FileSystemonly provides API to retrieve locations of all blocks, which can be potentially expensive.
2. This optimization is mainly useful for S3, where file metadata operations can be pretty slow. And basically locality is not available when using S3 (you can't run computation on S3 nodes).
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
- For a single Parquet part-file, in most cases the footer only resides in the last block of
that file. Thus we only need to retrieve the location of the last block. However, Hadoop
-
lazy val
metrics: Map[String, SQLMetric]
- Definition Classes
- RunnableCommand
-
def
missingInput: AttributeSet
- Definition Classes
- QueryPlan
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
nodeName: String
- Definition Classes
- TreeNode
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
numberedTreeString: String
- Definition Classes
- TreeNode
-
val
origin: Origin
- Definition Classes
- TreeNode
-
def
otherCopyArgs: Seq[AnyRef]
- Attributes
- protected
- Definition Classes
- TreeNode
-
def
output: Seq[Attribute]
- Definition Classes
- Command → QueryPlan
-
def
outputOrdering: Seq[SortOrder]
- Definition Classes
- LogicalPlan
-
def
outputSet: AttributeSet
- Definition Classes
- QueryPlan
-
def
p(number: Int): LogicalPlan
- Definition Classes
- TreeNode
-
def
parsePartitionPredicates(spark: SparkSession, predicate: String): Seq[Expression]
Converts string predicates into Expressions relative to a transaction.
Converts string predicates into Expressions relative to a transaction.
- Attributes
- protected
- Definition Classes
- DeltaCommand
- Exceptions thrown
AnalysisExceptionif a non-partition column is referenced.
-
lazy val
partitionColNames: Seq[String]
- Definition Classes
- ConvertToDeltaCommandBase
-
lazy val
partitionFields: Seq[StructField]
- Definition Classes
- ConvertToDeltaCommandBase
- val partitionSchema: Option[StructType]
-
def
performConvert(spark: SparkSession, txn: OptimisticTransaction, convertProperties: ConvertProperties): Seq[Row]
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
def
prettyJson: String
- Definition Classes
- TreeNode
-
def
printSchema(): Unit
- Definition Classes
- QueryPlan
-
def
producedAttributes: AttributeSet
- Definition Classes
- QueryPlan
-
def
recordDeltaEvent(deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty, data: AnyRef = null): Unit
Used to record the occurrence of a single event or report detailed, operation specific statistics.
Used to record the occurrence of a single event or report detailed, operation specific statistics.
- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordDeltaOperation[A](deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty)(thunk: ⇒ A): A
Used to report the duration as well as the success or failure of an operation.
Used to report the duration as well as the success or failure of an operation.
- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordEvent(metric: MetricDefinition, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, trimBlob: Boolean = true): Unit
- Definition Classes
- DatabricksLogging
-
def
recordOperation[S](opType: OpType, opTarget: String = null, extraTags: Map[TagDefinition, String], isSynchronous: Boolean = true, alwaysRecordStats: Boolean = false, allowAuthTags: Boolean = false, killJvmIfStuck: Boolean = false, outputMetric: MetricDefinition = null, silent: Boolean = true)(thunk: ⇒ S): S
- Definition Classes
- DatabricksLogging
-
def
recordUsage(metric: MetricDefinition, quantity: Double, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, forceSample: Boolean = false, trimBlob: Boolean = true, silent: Boolean = false): Unit
- Definition Classes
- DatabricksLogging
-
def
references: AttributeSet
- Definition Classes
- QueryPlan
-
def
refresh(): Unit
- Definition Classes
- LogicalPlan
-
def
removeFilesFromPaths(deltaLog: DeltaLog, nameToAddFileMap: Map[String, AddFile], filesToRewrite: Seq[String], operationTimestamp: Long): Seq[RemoveFile]
This method provides the RemoveFile actions that are necessary for files that are touched and need to be rewritten in methods like Delete, Update, and Merge.
This method provides the RemoveFile actions that are necessary for files that are touched and need to be rewritten in methods like Delete, Update, and Merge.
- deltaLog
The DeltaLog of the table that is being operated on
- nameToAddFileMap
A map generated using
generateCandidateFileMap.- filesToRewrite
Absolute paths of the files that were touched. We will search for these in
candidateFiles. Obtained as the output of theinput_file_namefunction.- operationTimestamp
The timestamp of the operation
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
def
resolve(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression]
- Definition Classes
- LogicalPlan
-
def
resolve(schema: StructType, resolver: Resolver): Seq[Attribute]
- Definition Classes
- LogicalPlan
-
def
resolveChildren(nameParts: Seq[String], resolver: Resolver): Option[NamedExpression]
- Definition Classes
- LogicalPlan
-
def
resolveExpressions(r: PartialFunction[Expression, Expression]): LogicalPlan
- Definition Classes
- AnalysisHelper
-
def
resolveIdentifier(analyzer: Analyzer, identifier: TableIdentifier): LogicalPlan
Use the analyzer to resolve the identifier provided
Use the analyzer to resolve the identifier provided
- analyzer
The session state analyzer to call
- identifier
Table Identifier to determine whether is path based or not
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
def
resolveOperators(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- AnalysisHelper
-
def
resolveOperatorsDown(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- AnalysisHelper
-
def
resolveOperatorsUp(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- AnalysisHelper
-
def
resolveQuoted(name: String, resolver: Resolver): Option[NamedExpression]
- Definition Classes
- LogicalPlan
-
lazy val
resolved: Boolean
- Definition Classes
- LogicalPlan
-
def
run(spark: SparkSession): Seq[Row]
- Definition Classes
- ConvertToDeltaCommandBase → RunnableCommand
-
final
def
sameResult(other: LogicalPlan): Boolean
- Definition Classes
- QueryPlan
-
lazy val
schema: StructType
- Definition Classes
- QueryPlan
-
def
schemaString: String
- Definition Classes
- QueryPlan
-
final
def
semanticHash(): Int
- Definition Classes
- QueryPlan
-
def
simpleString: String
- Definition Classes
- QueryPlan → TreeNode
-
def
statePrefix: String
- Attributes
- protected
- Definition Classes
- LogicalPlan → QueryPlan
-
def
stats: Statistics
- Definition Classes
- LogicalPlanStats
-
val
statsCache: Option[Statistics]
- Attributes
- protected
- Definition Classes
- LogicalPlanStats
-
def
streamWrite(spark: SparkSession, txn: OptimisticTransaction, addFiles: Iterator[AddFile], op: Convert, numFiles: Long): Long
Create the first commit on the Delta log by directly writing an iterator of AddFiles to the LogStore.
Create the first commit on the Delta log by directly writing an iterator of AddFiles to the LogStore. This bypasses the Delta transactional protocol, but we assume this is ok as this is the very first commit and only happens at table conversion which is a one-off process.
- Attributes
- protected
- Definition Classes
- ConvertToDeltaCommandBase
-
def
stringArgs: Iterator[Any]
- Attributes
- protected
- Definition Classes
- TreeNode
-
def
subqueries: Seq[LogicalPlan]
- Definition Classes
- QueryPlan
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
- val tableIdentifier: TableIdentifier
-
val
timestampPartitionPattern: String
- Definition Classes
- ConvertToDeltaCommandBase
-
def
toJSON: String
- Definition Classes
- TreeNode
-
def
toString(): String
- Definition Classes
- TreeNode → AnyRef → Any
-
def
transform(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- TreeNode
-
def
transformAllExpressions(rule: PartialFunction[Expression, Expression]): ConvertToDeltaCommand.this.type
- Definition Classes
- AnalysisHelper → QueryPlan
-
def
transformDown(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- AnalysisHelper → TreeNode
-
def
transformExpressions(rule: PartialFunction[Expression, Expression]): ConvertToDeltaCommand.this.type
- Definition Classes
- QueryPlan
-
def
transformExpressionsDown(rule: PartialFunction[Expression, Expression]): ConvertToDeltaCommand.this.type
- Definition Classes
- QueryPlan
-
def
transformExpressionsUp(rule: PartialFunction[Expression, Expression]): ConvertToDeltaCommand.this.type
- Definition Classes
- QueryPlan
-
def
transformUp(rule: PartialFunction[LogicalPlan, LogicalPlan]): LogicalPlan
- Definition Classes
- AnalysisHelper → TreeNode
-
def
treeString(verbose: Boolean, addSuffix: Boolean): String
- Definition Classes
- TreeNode
-
def
treeString: String
- Definition Classes
- TreeNode
-
def
validConstraints: Set[Expression]
- Attributes
- protected
- Definition Classes
- QueryPlanConstraints
-
def
verboseString: String
- Definition Classes
- QueryPlan → TreeNode
-
def
verboseStringWithSuffix: String
- Definition Classes
- LogicalPlan → TreeNode
-
def
verifyPartitionPredicates(spark: SparkSession, partitionColumns: Seq[String], predicates: Seq[Expression]): Unit
- Attributes
- protected
- Definition Classes
- DeltaCommand
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
withNewChildren(newChildren: Seq[LogicalPlan]): LogicalPlan
- Definition Classes
- TreeNode
-
def
withStatusCode[T](statusCode: String, defaultMessage: String, data: Map[String, Any] = Map.empty)(body: ⇒ T): T
Report a log to indicate some command is running.
Report a log to indicate some command is running.
- Definition Classes
- DeltaProgressReporter