object DeltaTableUtils extends PredicateHelper with DeltaLogging
- Alphabetic
- By Inheritance
- DeltaTableUtils
- DeltaLogging
- DatabricksLogging
- DeltaProgressReporter
- LoggingShims
- PredicateHelper
- Logging
- AliasHelper
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
-
implicit
class
LogStringContext extends AnyRef
- Definition Classes
- LoggingShims
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
SPARK_INTERNAL_METADATA_KEYS: Seq[String]
A list of Spark internal metadata keys that we may save in a Delta table schema unintentionally due to SPARK-43123.
A list of Spark internal metadata keys that we may save in a Delta table schema unintentionally due to SPARK-43123. We need to remove them before handing over the schema to Spark to avoid Spark interpreting table columns incorrectly.
Hard-coded strings are used intentionally as we want to capture possible keys used before SPARK-43123 regardless Spark versions. For example, if Spark changes any key string in future after SPARK-43123, the new string won't be leaked, but we still want to clean up the old key.
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
buildBalancedPredicate(expressions: Seq[Expression], op: (Expression, Expression) ⇒ Expression): Expression
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
canEvaluate(expr: Expression, plan: LogicalPlan): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
canEvaluateWithinJoin(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
containsSubquery(condition: Expression): Boolean
Check if condition involves a subquery expression.
-
def
deltaAssert(check: ⇒ Boolean, name: String, msg: String, deltaLog: DeltaLog = null, data: AnyRef = null, path: Option[Path] = None): Unit
Helper method to check invariants in Delta code.
Helper method to check invariants in Delta code. Fails when running in tests, records a delta assertion event and logs a warning otherwise.
- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
dropColumns(spark: SparkSession, target: LogicalPlan, columnsToDrop: Seq[String]): LogicalPlan
Many Delta meta-queries involve nondeterminstic functions, which interfere with automatic column pruning, so columns can be manually pruned from the scan.
Many Delta meta-queries involve nondeterminstic functions, which interfere with automatic column pruning, so columns can be manually pruned from the scan. Note that partition columns can never be dropped even if they're not referenced in the rest of the query.
- spark
the spark session to use
- target
the logical plan in which drop columns
- columnsToDrop
columns to drop from the scan
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
extractIfPathContainsTimeTravel(session: SparkSession, path: String, options: Map[String, String]): (String, Option[DeltaTimeTravelSpec])
Check if the given path contains time travel syntax with the
@.Check if the given path contains time travel syntax with the
@. If the path genuinely exists, returnNone. If the path doesn't exist, but is specifying time travel, return theDeltaTimeTravelSpecas well as the real path. -
def
extractPredicatesWithinOutputSet(condition: Expression, outputSet: AttributeSet): Option[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
findDeltaTableRoot(fs: FileSystem, path: Path): Option[Path]
Finds the root of a Delta table given a path if it exists.
-
def
findDeltaTableRoot(spark: SparkSession, path: Path, options: Map[String, String] = Map.empty): Option[Path]
Find the root of a Delta table from the provided path.
-
def
findExpressionAndTrackLineageDown(exp: Expression, plan: LogicalPlan): Option[(Expression, LogicalPlan)]
- Definition Classes
- PredicateHelper
-
def
getAliasMap(exprs: Seq[NamedExpression]): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
getAliasMap(plan: Aggregate): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
getAliasMap(plan: Project): AttributeMap[Alias]
- Attributes
- protected
- Definition Classes
- AliasHelper
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getCommonTags(deltaLog: DeltaLog, tahoeId: String): Map[TagDefinition, String]
- Definition Classes
- DeltaLogging
-
def
getErrorData(e: Throwable): Map[String, Any]
- Definition Classes
- DeltaLogging
-
def
getFileMetadataColumn(df: DataFrame): Column
Finds and returns the file source metadata column from a dataframe
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
isCatalogTable(catalog: SessionCatalog, tableIdent: TableIdentifier): Boolean
Checks whether TableIdentifier is a path or a table name We assume it is a path unless the table and database both exist in the catalog
Checks whether TableIdentifier is a path or a table name We assume it is a path unless the table and database both exist in the catalog
- catalog
session catalog used to check whether db/table exist
- tableIdent
the provided table or path
- returns
true if using table name, false if using path, error otherwise
-
def
isDeltaTable(spark: SparkSession, path: Path, options: Map[String, String] = Map.empty): Boolean
Check if the provided path is the root or the children of a Delta table.
-
def
isDeltaTable(spark: SparkSession, tableName: TableIdentifier): Boolean
Check whether the provided table name is a Delta table based on information from the Catalog.
-
def
isDeltaTable(table: CatalogTable): Boolean
Check whether this table is a Delta table based on information from the Catalog.
-
def
isHiddenDirectory(partitionColumnNames: Seq[String], pathName: String, shouldIcebergMetadataDirBeHidden: Boolean = true): Boolean
Whether a path should be hidden for delta-related file operations, such as Vacuum and Fsck.
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
def
isLikelySelective(e: Expression): Boolean
- Definition Classes
- PredicateHelper
-
def
isNullIntolerant(expr: Expression): Boolean
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
isPredicateMetadataOnly(condition: Expression, partitionColumns: Seq[String], spark: SparkSession): Boolean
Check if condition can be evaluated using only metadata.
Check if condition can be evaluated using only metadata. In Delta, this means the condition only references partition columns and involves no subquery.
-
def
isPredicatePartitionColumnsOnly(condition: Expression, partitionColumns: Seq[String], spark: SparkSession): Boolean
Does the predicate only contains partition columns?
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
isValidPath(tableIdent: TableIdentifier): Boolean
- tableIdent
the provided table or path
- returns
whether or not the provided TableIdentifier can specify a path for parquet or delta
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logConsole(line: String): Unit
- Definition Classes
- DatabricksLogging
-
def
logDebug(entry: LogEntry, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logDebug(entry: LogEntry): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(entry: LogEntry, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logError(entry: LogEntry): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(entry: LogEntry, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logInfo(entry: LogEntry): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(entry: LogEntry, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logTrace(entry: LogEntry): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(entry: LogEntry, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logWarning(entry: LogEntry): Unit
- Attributes
- protected
- Definition Classes
- LoggingShims
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
outputWithNullability(output: Seq[Attribute], nonNullAttrExprIds: Seq[ExprId]): Seq[Attribute]
- Attributes
- protected
- Definition Classes
- PredicateHelper
- def parseColToTransform(col: String): IdentityTransform
- def parseColsToClusterByTransform(cols: Seq[String]): ClusterByTransform
-
def
recordDeltaEvent(deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty, data: AnyRef = null, path: Option[Path] = None): Unit
Used to record the occurrence of a single event or report detailed, operation specific statistics.
Used to record the occurrence of a single event or report detailed, operation specific statistics.
- path
Used to log the path of the delta table when
deltaLogis null.
- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordDeltaOperation[A](deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty)(thunk: ⇒ A): A
Used to report the duration as well as the success or failure of an operation on a
deltaLog.Used to report the duration as well as the success or failure of an operation on a
deltaLog.- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordDeltaOperationForTablePath[A](tablePath: String, opType: String, tags: Map[TagDefinition, String] = Map.empty)(thunk: ⇒ A): A
Used to report the duration as well as the success or failure of an operation on a
tahoePath.Used to report the duration as well as the success or failure of an operation on a
tahoePath.- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordEvent(metric: MetricDefinition, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, trimBlob: Boolean = true): Unit
- Definition Classes
- DatabricksLogging
-
def
recordFrameProfile[T](group: String, name: String)(thunk: ⇒ T): T
- Attributes
- protected
- Definition Classes
- DeltaLogging
-
def
recordOperation[S](opType: OpType, opTarget: String = null, extraTags: Map[TagDefinition, String], isSynchronous: Boolean = true, alwaysRecordStats: Boolean = false, allowAuthTags: Boolean = false, killJvmIfStuck: Boolean = false, outputMetric: MetricDefinition = METRIC_OPERATION_DURATION, silent: Boolean = true)(thunk: ⇒ S): S
- Definition Classes
- DatabricksLogging
-
def
recordProductEvent(metric: MetricDefinition with CentralizableMetric, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, trimBlob: Boolean = true): Unit
- Definition Classes
- DatabricksLogging
-
def
recordProductUsage(metric: MetricDefinition with CentralizableMetric, quantity: Double, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, forceSample: Boolean = false, trimBlob: Boolean = true, silent: Boolean = false): Unit
- Definition Classes
- DatabricksLogging
-
def
recordUsage(metric: MetricDefinition, quantity: Double, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, forceSample: Boolean = false, trimBlob: Boolean = true, silent: Boolean = false): Unit
- Definition Classes
- DatabricksLogging
-
def
removeInternalWriterMetadata(spark: SparkSession, schema: StructType): StructType
Removes from the given schema all the metadata keys that are not used when reading a Delta table.
Removes from the given schema all the metadata keys that are not used when reading a Delta table. This includes typically all metadata used by writer-only table features. Note that this also removes all leaked Spark internal metadata.
-
def
removeSparkInternalMetadata(spark: SparkSession, schema: StructType): StructType
Remove leaked metadata keys from the persisted table schema.
Remove leaked metadata keys from the persisted table schema. Old versions might leak metadata intentionally. This method removes all possible metadata keys to avoid Spark interpreting table columns incorrectly.
-
def
replaceAlias(expr: Expression, aliasMap: AttributeMap[Alias]): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
replaceAliasButKeepName(expr: NamedExpression, aliasMap: AttributeMap[Alias]): NamedExpression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
replaceFileFormat(target: LogicalPlan, updatedFileFormat: FileFormat): LogicalPlan
Update FileFormat for a plan and return the updated plan
Update FileFormat for a plan and return the updated plan
- target
Target plan to update
- updatedFileFormat
Updated file format
- returns
Updated logical plan
-
def
replaceFileIndex(target: LogicalPlan, fileIndex: FileIndex): LogicalPlan
Replace the file index in a logical plan and return the updated plan.
Replace the file index in a logical plan and return the updated plan. It's a common pattern that, in Delta commands, we use data skipping to determine a subset of files that can be affected by the command, so we replace the whole-table file index in the original logical plan with a new index of potentially affected files, while everything else in the original plan, e.g., resolved references, remain unchanged.
- target
the logical plan in which we replace the file index
- fileIndex
the new file index
-
def
resolveTimeTravelVersion(conf: SQLConf, deltaLog: DeltaLog, tt: DeltaTimeTravelSpec, canReturnLastCommit: Boolean = false): (Long, String)
Given a time travel node, resolve which version it is corresponding to for the given table and return the resolved version as well as the access type, i.e.
Given a time travel node, resolve which version it is corresponding to for the given table and return the resolved version as well as the access type, i.e. by
versionortimestamp. -
def
safeConcatPaths(basePath: Path, relativeChildPath: String): Path
Uses org.apache.hadoop.fs.Path.mergePaths to concatenate a base path and a relative child path.
Uses org.apache.hadoop.fs.Path.mergePaths to concatenate a base path and a relative child path.
This method is designed to address two specific issues in Hadoop Path:
Issue 1: When the base path represents a Uri with an empty path component, such as concatenating "s3://my-bucket" and "childPath". In this case, the child path is converted to an absolute path at the root, i.e. /childPath. This prevents a "URISyntaxException: Relative path in absolute URI", which would be thrown by org.apache.hadoop.fs.Path(Path, String) because it tries to convert the base path to a Uri and then resolve the child on top of it. This is invalid for an empty base path and a relative child path according to the Uri specification, which states that if an authority is defined, the path component needs to be either empty or start with a '/'.
Issue 2 (only when DeltaSQLConf.DELTA_WORK_AROUND_COLONS_IN_HADOOP_PATHS is
true): When the child path contains a special character ':', such as "aaaa:bbbb.csv". This is valid in many file systems such as S3, but is actually ambiguous because it can be parsed either as an absolute path with a scheme ("aaaa") and authority ("bbbb.csv"), or as a relative path with a colon in the name ("aaaa:bbbb.csv"). Hadoop Path will always interpret it as the former, which is not what we want in this case. Therefore, we prepend a '/' to the child path to ensure that it is always interpreted as a relative path. See https://issues.apache.org/jira/browse/HDFS-14762 for more details. -
def
splitConjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
splitDisjunctivePredicates(condition: Expression): Seq[Expression]
- Attributes
- protected
- Definition Classes
- PredicateHelper
-
def
splitMetadataAndDataPredicates(condition: Expression, partitionColumns: Seq[String], spark: SparkSession): (Seq[Expression], Seq[Expression])
Partition the given condition into two sequence of conjunctive predicates: - predicates that can be evaluated using metadata only.
Partition the given condition into two sequence of conjunctive predicates: - predicates that can be evaluated using metadata only. - other predicates.
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
trimAliases(e: Expression): Expression
- Attributes
- protected
- Definition Classes
- AliasHelper
-
def
trimNonTopLevelAliases[T <: Expression](e: T): T
- Attributes
- protected
- Definition Classes
- AliasHelper
- val validDeltaTableHadoopPrefixes: List[String]
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
- def withActiveSession[T](spark: SparkSession)(body: ⇒ T): T
-
def
withStatusCode[T](statusCode: String, defaultMessage: String, data: Map[String, Any] = Map.empty)(body: ⇒ T): T
Report a log to indicate some command is running.
Report a log to indicate some command is running.
- Definition Classes
- DeltaProgressReporter