object FolderCompactionUtils
- Alphabetic
- By Inheritance
- FolderCompactionUtils
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
- val INPUT_MODEL_CONF_KEY: String
- val NUM_PARTITIONS_CONF_KEY: String
- val OUTPUT_MODEL_CONF_KEY: String
- val PARTITIONS_CONF_KEY: String
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native() @HotSpotIntrinsicCandidate()
-
def
discoverPartitionFiles(fileSystem: FileSystem, basePath: Path)(partitions: Map[ColumnName, List[ColumnValue]]): List[Path]
Given a basePath, finds all files associated to the specified partitions
Given a basePath, finds all files associated to the specified partitions
- fileSystem
the file system from which to read the files
- basePath
the base path from which to begin the search
- partitions
the partitions to search
- returns
the list of Path of files found
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
filterPartitionsCombination(folders: Array[String], combination: PartitionsCombination): Boolean
- folders
the list of directories that a file path contains E.g. [ "journey", "raw", "a=1", "b=2" ]
- combination
the combination of partitions E.g. [ "a=1", "b=2" ]
- returns
true if the partitions of
combinationsare all present infolders, false otherwise
-
def
filterPath(path: Path)(combinations: List[String]): Boolean
- path
the path of the file
- combinations
the combination of partitions
- returns
true if the partitions of
combinationsare all present inpath, false otherwise
-
def
filterSingleQuery(whereCondition: WhereCondition)(file: Path): Boolean
- whereCondition
the WhereCondition to filter
- file
the file to check
- returns
true if
whereConditioncovers the path offile, false otherwise
-
def
filterWhereCondition(files: List[Path])(whereCondition: WhereCondition): Boolean
- files
the list of files to
- whereCondition
the WhereCondition to filter
- returns
true if at least one file is present for this WhereCondition, false otherwise
-
def
generateCombinations(partitions: List[(ColumnName, List[ColumnValue])]): List[PartitionsCombination]
Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )
Generates all the possible combinations of columnName and columnValue Example: partitions = List( "a" -> List("1"), "b" -> List("2", "3"), "c" -> List("4", "5", "6") ) output = List( ("a", "1") :: ("b", "2) :: ("c", "4") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "2") :: ("c", "6") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "4") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "5") :: Nil, ("a", "1") :: ("b", "3") :: ("c", "6") :: Nil, )
- partitions
the list of partitions to generate the combinations
- returns
all the possible combinations obtained from the input partitions
-
def
generateWhereConditions(partitions: Map[ColumnName, List[ColumnValue]], inputModel: RawModel, outputModel: RawModel): List[WhereCondition]
Builds the list of WhereCondition used to filter the original DataFrame read from the input model.
Builds the list of WhereCondition used to filter the original DataFrame read from the input model. This list has one element for each output partition combination, in order to write the correct number of files to the partitions specified by the output model. Each of these combinations is put in AND with all the input partitions combinations, in order to write only the files of the partitions requested. Example: inputModel.partitions = [a, b, c, d] outputModel.partitions = [a, b] partitions = a -> [1, 2], b -> [3, 4], c -> [5, 6], d -> [7, 8]
output = [ (a=1 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=1 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=3) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) (a=2 AND b=4) AND ( (c=5 AND d=7) OR (c=5 AND d=8) OR (c=6 AND d=7) OR (c=6 AND d=8) ) ]
- partitions
the list of partitions to generate the conditions
- inputModel
the inputModel defining the input partitions
- outputModel
the outputModel defining the output partitions
- returns
the list of WhereCondition generated
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native() @HotSpotIntrinsicCandidate()
- def parseConfigModel(conf: Config): RawModel
- def parseModel(conf: Config, key: String): RawModel
- def parsePartitions(conf: Config): Map[ColumnName, List[ColumnValue]]
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
Deprecated Value Members
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] ) @Deprecated @deprecated
- Deprecated
(Since version ) see corresponding Javadoc for more information.