public class RewriteManifestsSparkAction
extends java.lang.Object
implements org.apache.iceberg.actions.RewriteManifests
By default, this action rewrites all manifests for the current partition spec and writes the
result to the metadata folder. The behavior can be modified by passing a custom predicate to
rewriteIf(Predicate) and a custom spec id to specId(int). In addition, there is
a way to configure a custom location for new manifests via stagingLocation.
| Modifier and Type | Field and Description |
|---|---|
protected static org.apache.iceberg.relocated.com.google.common.base.Joiner |
COMMA_JOINER |
protected static org.apache.iceberg.relocated.com.google.common.base.Splitter |
COMMA_SPLITTER |
protected static java.lang.String |
FILE_PATH |
protected static java.lang.String |
LAST_MODIFIED |
protected static java.lang.String |
MANIFEST |
protected static java.lang.String |
MANIFEST_LIST |
protected static java.lang.String |
OTHERS |
protected static java.lang.String |
STATISTICS_FILES |
static java.lang.String |
USE_CACHING |
static boolean |
USE_CACHING_DEFAULT |
| Modifier and Type | Method and Description |
|---|---|
protected org.apache.spark.sql.Dataset<FileInfo> |
allReachableOtherMetadataFileDS(org.apache.iceberg.Table table) |
protected void |
commit(org.apache.iceberg.SnapshotUpdate<?> update) |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(org.apache.iceberg.Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
contentFileDS(org.apache.iceberg.Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(java.util.concurrent.ExecutorService executorService,
java.util.function.Consumer<java.lang.String> deleteFunc,
java.util.Iterator<FileInfo> files)
Deletes files and keeps track of how many files were removed for each file type.
|
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary |
deleteFiles(org.apache.iceberg.io.SupportsBulkOperations io,
java.util.Iterator<FileInfo> files) |
org.apache.iceberg.actions.RewriteManifests.Result |
execute() |
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> |
loadMetadataTable(org.apache.iceberg.Table table,
org.apache.iceberg.MetadataTableType type) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(org.apache.iceberg.Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestDS(org.apache.iceberg.Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(org.apache.iceberg.Table table) |
protected org.apache.spark.sql.Dataset<FileInfo> |
manifestListDS(org.apache.iceberg.Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected JobGroupInfo |
newJobGroupInfo(java.lang.String groupId,
java.lang.String desc) |
protected org.apache.iceberg.Table |
newStaticTable(org.apache.iceberg.TableMetadata metadata,
org.apache.iceberg.io.FileIO io) |
ThisT |
option(java.lang.String name,
java.lang.String value) |
protected java.util.Map<java.lang.String,java.lang.String> |
options() |
ThisT |
options(java.util.Map<java.lang.String,java.lang.String> newOptions) |
protected org.apache.spark.sql.Dataset<FileInfo> |
otherMetadataFileDS(org.apache.iceberg.Table table) |
RewriteManifestsSparkAction |
rewriteIf(java.util.function.Predicate<org.apache.iceberg.ManifestFile> newPredicate) |
protected RewriteManifestsSparkAction |
self() |
ThisT |
snapshotProperty(java.lang.String property,
java.lang.String value) |
protected org.apache.spark.sql.SparkSession |
spark() |
protected org.apache.spark.api.java.JavaSparkContext |
sparkContext() |
RewriteManifestsSparkAction |
specId(int specId) |
RewriteManifestsSparkAction |
stagingLocation(java.lang.String newStagingLocation) |
protected org.apache.spark.sql.Dataset<FileInfo> |
statisticsFileDS(org.apache.iceberg.Table table,
java.util.Set<java.lang.Long> snapshotIds) |
protected <T> T |
withJobGroupInfo(JobGroupInfo info,
java.util.function.Supplier<T> supplier) |
public static final java.lang.String USE_CACHING
public static final boolean USE_CACHING_DEFAULT
protected static final java.lang.String MANIFEST
protected static final java.lang.String MANIFEST_LIST
protected static final java.lang.String STATISTICS_FILES
protected static final java.lang.String OTHERS
protected static final java.lang.String FILE_PATH
protected static final java.lang.String LAST_MODIFIED
protected static final org.apache.iceberg.relocated.com.google.common.base.Splitter COMMA_SPLITTER
protected static final org.apache.iceberg.relocated.com.google.common.base.Joiner COMMA_JOINER
protected RewriteManifestsSparkAction self()
public RewriteManifestsSparkAction specId(int specId)
specId in interface org.apache.iceberg.actions.RewriteManifestspublic RewriteManifestsSparkAction rewriteIf(java.util.function.Predicate<org.apache.iceberg.ManifestFile> newPredicate)
rewriteIf in interface org.apache.iceberg.actions.RewriteManifestspublic RewriteManifestsSparkAction stagingLocation(java.lang.String newStagingLocation)
stagingLocation in interface org.apache.iceberg.actions.RewriteManifestspublic org.apache.iceberg.actions.RewriteManifests.Result execute()
execute in interface org.apache.iceberg.actions.Action<org.apache.iceberg.actions.RewriteManifests,org.apache.iceberg.actions.RewriteManifests.Result>public ThisT snapshotProperty(java.lang.String property,
java.lang.String value)
protected void commit(org.apache.iceberg.SnapshotUpdate<?> update)
protected org.apache.spark.sql.SparkSession spark()
protected org.apache.spark.api.java.JavaSparkContext sparkContext()
public ThisT option(java.lang.String name,
java.lang.String value)
public ThisT options(java.util.Map<java.lang.String,java.lang.String> newOptions)
protected java.util.Map<java.lang.String,java.lang.String> options()
protected <T> T withJobGroupInfo(JobGroupInfo info, java.util.function.Supplier<T> supplier)
protected JobGroupInfo newJobGroupInfo(java.lang.String groupId, java.lang.String desc)
protected org.apache.iceberg.Table newStaticTable(org.apache.iceberg.TableMetadata metadata,
org.apache.iceberg.io.FileIO io)
protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(org.apache.iceberg.Table table)
protected org.apache.spark.sql.Dataset<FileInfo> contentFileDS(org.apache.iceberg.Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(org.apache.iceberg.Table table)
protected org.apache.spark.sql.Dataset<FileInfo> manifestDS(org.apache.iceberg.Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(org.apache.iceberg.Table table)
protected org.apache.spark.sql.Dataset<FileInfo> manifestListDS(org.apache.iceberg.Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> statisticsFileDS(org.apache.iceberg.Table table, java.util.Set<java.lang.Long> snapshotIds)
protected org.apache.spark.sql.Dataset<FileInfo> otherMetadataFileDS(org.apache.iceberg.Table table)
protected org.apache.spark.sql.Dataset<FileInfo> allReachableOtherMetadataFileDS(org.apache.iceberg.Table table)
protected org.apache.spark.sql.Dataset<org.apache.spark.sql.Row> loadMetadataTable(org.apache.iceberg.Table table,
org.apache.iceberg.MetadataTableType type)
protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(java.util.concurrent.ExecutorService executorService,
java.util.function.Consumer<java.lang.String> deleteFunc,
java.util.Iterator<FileInfo> files)
executorService - an executor service to use for parallel deletesdeleteFunc - a delete funcfiles - an iterator of Spark rows of the structure (path: String, type: String)protected org.apache.iceberg.spark.actions.BaseSparkAction.DeleteSummary deleteFiles(org.apache.iceberg.io.SupportsBulkOperations io,
java.util.Iterator<FileInfo> files)