o

org.apache.spark.sql.delta.util

DeltaFileOperations

object DeltaFileOperations extends DeltaLogging

Some utility methods on files, directories, and paths.

Linear Supertypes
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. DeltaFileOperations
  2. DeltaLogging
  3. DatabricksLogging
  4. DeltaProgressReporter
  5. Logging
  6. AnyRef
  7. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. def absolutePath(basePath: String, child: String): Path

    Create an absolute path from child using the basePath if the child is a relative path.

    Create an absolute path from child using the basePath if the child is a relative path. Return child if it is an absolute path.

    basePath

    Base path to prepend to child if child is a relative path. Note: It is assumed that the basePath do not have any escaped characters and is directly readable by Hadoop APIs.

    child

    Child path to append to basePath if child is a relative path. Note: t is assumed that the child is escaped, that is, all special chars that need escaping by URI standards are already escaped.

    returns

    Absolute path without escaped chars that is directly readable by Hadoop APIs.

  5. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  6. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  7. def defaultHiddenFileFilter(fileName: String): Boolean

    The default filter for hidden files.

    The default filter for hidden files. Files names beginning with _ or . are considered hidden.

    returns

    true if the file is hidden

  8. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  9. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  10. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  11. def getAllSubDirectories(base: String, path: String): (Iterator[String], String)

    Returns all the levels of sub directories that path has with respect to base.

    Returns all the levels of sub directories that path has with respect to base. For example: getAllSubDirectories("/base", "/base/a/b/c") => (Iterator("/base/a", "/base/a/b"), "/base/a/b/c")

  12. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  13. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  14. def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  15. def initializeLogIfNecessary(isInterpreter: Boolean): Unit
    Attributes
    protected
    Definition Classes
    Logging
  16. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  17. def isTraceEnabled(): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  18. def log: Logger
    Attributes
    protected
    Definition Classes
    Logging
  19. def logConsole(line: String): Unit
    Definition Classes
    DatabricksLogging
  20. def logDebug(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  21. def logDebug(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  22. def logError(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  23. def logError(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  24. def logInfo(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  25. def logInfo(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  26. def logName: String
    Attributes
    protected
    Definition Classes
    Logging
  27. def logTrace(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  28. def logTrace(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  29. def logWarning(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  30. def logWarning(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  31. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  32. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  33. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  34. def readParquetFootersInParallel(conf: Configuration, partFiles: Seq[FileStatus], ignoreCorruptFiles: Boolean): Seq[Footer]

    Reads Parquet footers in multi-threaded manner.

    Reads Parquet footers in multi-threaded manner. If the config "spark.sql.files.ignoreCorruptFiles" is set to true, we will ignore the corrupted files when reading footers.

  35. def recordDeltaEvent(deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty, data: AnyRef = null): Unit

    Used to record the occurrence of a single event or report detailed, operation specific statistics.

    Used to record the occurrence of a single event or report detailed, operation specific statistics.

    Attributes
    protected
    Definition Classes
    DeltaLogging
  36. def recordDeltaOperation[A](deltaLog: DeltaLog, opType: String, tags: Map[TagDefinition, String] = Map.empty)(thunk: ⇒ A): A

    Used to report the duration as well as the success or failure of an operation.

    Used to report the duration as well as the success or failure of an operation.

    Attributes
    protected
    Definition Classes
    DeltaLogging
  37. def recordEvent(metric: MetricDefinition, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, trimBlob: Boolean = true): Unit
    Definition Classes
    DatabricksLogging
  38. def recordOperation[S](opType: OpType, opTarget: String = null, extraTags: Map[TagDefinition, String], isSynchronous: Boolean = true, alwaysRecordStats: Boolean = false, allowAuthTags: Boolean = false, killJvmIfStuck: Boolean = false, outputMetric: MetricDefinition = null, silent: Boolean = true)(thunk: ⇒ S): S
    Definition Classes
    DatabricksLogging
  39. def recordUsage(metric: MetricDefinition, quantity: Double, additionalTags: Map[TagDefinition, String] = Map.empty, blob: String = null, forceSample: Boolean = false, trimBlob: Boolean = true, silent: Boolean = false): Unit
    Definition Classes
    DatabricksLogging
  40. def recursiveListDirs(spark: SparkSession, subDirs: Seq[String], hadoopConf: Broadcast[SerializableConfiguration], hiddenFileNameFilter: (String) ⇒ Boolean = defaultHiddenFileFilter, fileListingParallelism: Option[Int] = None): Dataset[SerializableFileStatus]

    Recursively lists all the files and directories for the given subDirs in a scalable manner.

    Recursively lists all the files and directories for the given subDirs in a scalable manner.

    spark

    The SparkSession

    subDirs

    Absolute path of the subdirectories to list

    hadoopConf

    The Hadoop Configuration to get a FileSystem instance

    hiddenFileNameFilter

    A function that returns true when the file should be considered hidden and excluded from results. Defaults to checking for prefixes of "." or "_".

  41. def registerTempFileDeletionTaskFailureListener(conf: Configuration, tempPath: Path): Unit

    Register a task failure listener to delete a temp file in our best effort.

  42. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  43. def toString(): String
    Definition Classes
    AnyRef → Any
  44. def tryDeleteNonRecursive(fs: FileSystem, path: Path, tries: Int = 3): Boolean

    Tries deleting a file or directory non-recursively.

    Tries deleting a file or directory non-recursively. If the file/folder doesn't exist, that's fine, a separate operation may be deleting files/folders. If a directory is non-empty, we shouldn't delete it. FileSystem implementations throw an IOException in those cases, which we return as a "we failed to delete".

    Listing on S3 is not consistent after deletes, therefore in case the delete returns false, because the file didn't exist, then we still return true. Retries on S3 rate limits up to 3 times.

  45. def tryRelativizePath(fs: FileSystem, basePath: Path, child: Path): Path

    Given a path child:

    Given a path child:

    1. Returns child if the path is already relative 2. Tries relativizing child with respect to basePath a) If the child doesn't live within the same base path, returns child as is b) If child lives in a different FileSystem, throws an exception Note that child may physically be pointing to a path within basePath, but may logically belong to a different FileSystem, e.g. DBFS mount points and direct S3 paths.
  46. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  47. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  48. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  49. def withStatusCode[T](statusCode: String, defaultMessage: String, data: Map[String, Any] = Map.empty)(body: ⇒ T): T

    Report a log to indicate some command is running.

    Report a log to indicate some command is running.

    Definition Classes
    DeltaProgressReporter

Inherited from DeltaLogging

Inherited from DatabricksLogging

Inherited from DeltaProgressReporter

Inherited from Logging

Inherited from AnyRef

Inherited from Any

Ungrouped