Packages

object FolderCompactionUtils

Linear Supertypes
AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. FolderCompactionUtils
  2. AnyRef
  3. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Type Members

  1. type ColumnName = String
  2. type ColumnValue = String

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. val INPUT_MODEL_CONF_KEY: String
  5. val NUM_PARTITIONS_CONF_KEY: String
  6. val OUTPUT_MODEL_CONF_KEY: String
  7. val PARTITIONS_CONF_KEY: String
  8. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  9. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native() @HotSpotIntrinsicCandidate()
  10. def discoverPartitionFiles(fileSystem: FileSystem, basePath: Path)(partitions: Map[ColumnName, List[ColumnValue]]): List[Path]

    Given a basePath, finds all files associated to the specified partitions

    Given a basePath, finds all files associated to the specified partitions

    fileSystem

    the file system from which to read the files

    basePath

    the base path from which to begin the search

    partitions

    the partitions to search

    returns

    the list of Path of files found

  11. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  12. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  13. def filterPartitionsCombination(folders: Array[String], combination: PartitionsCombination): Boolean

    folders

    the list of directories that a file path contains E.g. [ "journey", "raw", "a=1", "b=2" ]

    combination

    the combination of partitions E.g. [ "a=1", "b=2" ]

    returns

    true if the partitions of combinations are all present in folders, false otherwise

  14. def filterPath(path: Path)(combinations: List[String]): Boolean

    path

    the path of the file

    combinations

    the combination of partitions

    returns

    true if the partitions of combinations are all present in path, false otherwise

  15. def filterSingleQuery(whereCondition: WhereCondition)(file: Path): Boolean

    whereCondition

    the WhereCondition to filter

    file

    the file to check

    returns

    true if whereCondition covers the path of file, false otherwise

  16. def filterWhereCondition(files: List[Path])(whereCondition: WhereCondition): Boolean

    files

    the list of files to

    whereCondition

    the WhereCondition to filter

    returns

    true if at least one file is present for this WhereCondition, false otherwise

  17. def generateCombinations(partitions: List[(ColumnName, List[ColumnValue])]): List[PartitionsCombination]

    Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )

    Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )

    partitions

    the list of partitions to generate the combinations

    returns

    all the possible combinations obtained from the input partitions

  18. def generateWhereConditions(partitions: Map[ColumnName, List[ColumnValue]], inputModel: RawModel, outputModel: RawModel): List[WhereCondition]

    Builds the list of WhereCondition used to filter the original DataFrame read from the input model.

    Builds the list of WhereCondition used to filter the original DataFrame read from the input model. This list has one element for each output partition combination, in order to write the correct number of files to the partitions specified by the output model. Each of these combinations is put in AND with all the input partitions combinations, in order to write only the files of the partitions requested. Example: inputModel.partitions = [a, b, c, d] outputModel.partitions = [a, b] partitions = a -> [1, 2], b -> [3, 4], c -> [5, 6], d -> [7, 8]

    output = [ (a=1 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=1 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) ]

    partitions

    the list of partitions to generate the conditions

    inputModel

    the inputModel defining the input partitions

    outputModel

    the outputModel defining the output partitions

    returns

    the list of WhereCondition generated

  19. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  20. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  21. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  22. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  23. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  24. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native() @HotSpotIntrinsicCandidate()
  25. def parseConfigModel(conf: Config): RawModel
  26. def parseModel(conf: Config, key: String): RawModel
  27. def parsePartitions(conf: Config): Map[ColumnName, List[ColumnValue]]
  28. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  29. def toString(): String
    Definition Classes
    AnyRef → Any
  30. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  31. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  32. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )

Deprecated Value Members

  1. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] ) @Deprecated @deprecated
    Deprecated

    (Since version ) see corresponding Javadoc for more information.

Inherited from AnyRef

Inherited from Any

Ungrouped