@InterfaceAudience.Public @InterfaceStability.Stable public interface HiveStorageHandler extends org.apache.hadoop.conf.Configurable
| Modifier and Type | Interface and Description |
|---|---|
static class |
HiveStorageHandler.AcidSupportType |
| Modifier and Type | Field and Description |
|---|---|
static List<AlterTableType> |
DEFAULT_ALLOWED_ALTER_OPS |
| Modifier and Type | Method and Description |
|---|---|
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> |
acidSelectColumns(Table table,
Context.Operation operation)
UpdateSemanticAnalyzer rewrites UPDATE and
DeleteSemanticAnalyzer rewrites DELETE queries into INSERT queries. |
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> |
acidSortColumns(Table table,
Context.Operation operation)
UpdateSemanticAnalyzer rewrites UPDATE and
DeleteSemanticAnalyzer rewrites DELETE queries into INSERT
queries. |
default List<VirtualColumn> |
acidVirtualColumns()
Specifies which additional virtual columns should be added to the virtual column registry during compilation
for tables that support ACID operations.
|
default boolean |
addDynamicSplitPruningEdge(Table table,
ExprNodeDesc syntheticFilterPredicate)
Test if the storage handler allows the push-down of join filter predicate to prune further the splits.
|
default void |
addResourcesForCreateTable(Map<String,String> tblProps,
HiveConf hiveConf)
Adds specific configurations to session for create table command.
|
default void |
alterTableSnapshotRefOperation(Table table,
AlterTableSnapshotRefSpec alterTableSnapshotRefSpec) |
default boolean |
alwaysUnpartitioned()
Check if partition columns should be removed and added to the list of regular columns in HMS.
|
default void |
appendFiles(org.apache.hadoop.hive.metastore.api.Table tbl,
URI fromURI,
boolean isOverwrite,
Map<String,String> partitionSpec)
Appends files to the table
|
default boolean |
areSnapshotsSupported()
Gets whether this storage handler supports snapshots.
|
default boolean |
canComputeQueryUsingStats(Table tbl)
Check if the storage handler answer a few queries like count(1) purely using stats.
|
default boolean |
canPerformMetadataDelete(Table hmsTable,
String branchName,
SearchArgument searchArgument) |
default boolean |
canProvideBasicStatistics()
Check if the storage handler can provide basic statistics.
|
default boolean |
canProvideColStatistics(Table tbl)
Check if the storage handler can provide col statistics.
|
default boolean |
canSetColStatistics(Table tbl)
Check if the storage handler can set col statistics.
|
default boolean |
canUseTruncate(Table hmsTable,
Map<String,String> partitionSpec) |
default Table |
checkAndSetTableMetaRef(Table hmsTable,
String tableMetaRef) |
default boolean |
commitInMoveTask()
Checks if we should keep the
MoveTask and use the
storageHandlerCommit(Properties, boolean) method for committing inserts instead of
DefaultHiveMetaHook.commitInsertTable(Table, boolean). |
void |
configureInputJobCredentials(TableDesc tableDesc,
Map<String,String> secrets)
This method is called to allow the StorageHandlers the chance to
populate secret keys into the job's credentials.
|
void |
configureInputJobProperties(TableDesc tableDesc,
Map<String,String> jobProperties)
This method is called to allow the StorageHandlers the chance
to populate the JobContext.getConfiguration() with properties that
maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
|
void |
configureJobConf(TableDesc tableDesc,
org.apache.hadoop.mapred.JobConf jobConf)
Called just before submitting MapReduce job.
|
void |
configureOutputJobProperties(TableDesc tableDesc,
Map<String,String> jobProperties)
This method is called to allow the StorageHandlers the chance
to populate the JobContext.getConfiguration() with properties that
maybe be needed by the handler's bundled artifacts (ie InputFormat, SerDe, etc).
|
void |
configureTableJobProperties(TableDesc tableDesc,
Map<String,String> jobProperties)
Deprecated.
|
default DynamicPartitionCtx |
createDPContext(HiveConf conf,
Table table,
Context.Operation writeOperation)
Creates a DynamicPartitionCtx instance that will be set up by the storage handler itself.
|
default boolean |
directInsert()
Check if CTAS and CMV operations should behave in a direct-insert manner (i.e.
|
default void |
executeOperation(Table table,
AlterTableExecuteSpec executeSpec)
Execute an operation on storage handler level
|
HiveAuthorizationProvider |
getAuthorizationProvider()
Returns the implementation specific authorization provider
|
default Map<String,String> |
getBasicStatistics(Partish partish)
Return some basic statistics (numRows, numFiles, totalSize) calculated by the underlying storage handler
implementation.
|
default List<org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj> |
getColStatistics(Table table)
Return some col statistics (Lower bounds, Upper bounds, Null value counts, NaN, total counts) calculated by
the underlying storage handler implementation.
|
default ColumnInfo |
getColumnInfo(Table hmsTable,
String colName) |
default SnapshotContext |
getCurrentSnapshotContext(Table table)
Query the most recent unique snapshot's context of the passed table.
|
default String |
getFileFormatPropertyKey()
Get file format property key, if the file format is configured through a table property.
|
Class<? extends org.apache.hadoop.mapred.InputFormat> |
getInputFormatClass() |
default org.apache.hadoop.hive.metastore.api.LockType |
getLockType(WriteEntity writeEntity) |
HiveMetaHook |
getMetaHook() |
default Map<String,String> |
getNativeProperties(Table table)
Extract the native properties of the table which aren't stored in the HMS
|
default Map<String,String> |
getOperatorDescProperties(OperatorDesc operatorDesc,
Map<String,String> initialProps)
Used to add additional operator specific information from storage handler during DESCRIBE EXTENDED statement.
|
Class<? extends org.apache.hadoop.mapred.OutputFormat> |
getOutputFormatClass() |
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> |
getPartitionKeys(Table hmsTable) |
default List<String> |
getPartitionNames(Table hmsTable,
Map<String,String> partitionSpec) |
default List<Partition> |
getPartitionsByExpr(Table hmsTable,
ExprNodeDesc desc) |
default List<TransformSpec> |
getPartitionTransformSpec(Table table)
Return a list of partition transform specifications.
|
default org.apache.hadoop.hive.metastore.api.FieldSchema |
getRowId() |
Class<? extends AbstractSerDe> |
getSerDeClass() |
default StorageFormatDescriptor |
getStorageFormatDescriptor(org.apache.hadoop.hive.metastore.api.Table table)
Gets the storage format descriptor to be used for temp table for LOAD data.
|
default StorageHandlerInfo |
getStorageHandlerInfo(org.apache.hadoop.hive.metastore.api.Table table)
Used to fetch runtime information about storage handler during DESCRIBE EXTENDED statement
|
default StorageFormat.StorageHandlerTypes |
getType() |
default URI |
getURIForAuth(org.apache.hadoop.hive.metastore.api.Table table)
Constructs a URI for authorization purposes using the HMS table object
|
default Boolean |
hasAppendsOnly(Table hmsTable,
SnapshotContext since) |
default boolean |
isAllowedAlterOperation(AlterTableType opType)
Checks whether a certain ALTER TABLE operation is supported by the storage handler implementation.
|
default boolean |
isMetadataTableSupported()
Deprecated.
|
default boolean |
isTableMetaRefSupported()
Check whether the table supports metadata references which mainly include branch, tag and metadata tables.
|
default boolean |
isTimeTravelAllowed()
Should return true if the StorageHandler is able to handle time travel.
|
default boolean |
isValidMetadataTable(String metaTableName) |
default void |
prepareAlterTableEnvironmentContext(AbstractAlterTableDesc alterTableDesc,
org.apache.hadoop.hive.metastore.api.EnvironmentContext environmentContext)
Alter table operations can rely on this to customize the EnvironmentContext to be used during the alter table
invocation (both on client and server side of HMS)
|
default boolean |
setColStatistics(Table table,
List<org.apache.hadoop.hive.metastore.api.ColumnStatistics> colStats)
Set column stats for non-native tables
|
default void |
setTableLocationForCTAS(CreateTableDesc desc,
String location)
Sets tables physical location at create table as select.
|
default void |
setTableParametersForCTLT(Table tbl,
CreateTableLikeDesc desc,
Map<String,String> origParams)
Retains storage handler specific properties during CTLT.
|
default boolean |
shouldOverwrite(Table mTable,
Context.Operation operation)
Returns whether the data should be overwritten for the specific operation.
|
default List<String> |
showPartitions(DDLOperationContext context,
Table tbl)
Checks if storage handler supports Show Partitions and returns a list of partitions
|
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> |
sortColumns(Table table)
Collect the columns that are used to sort the content of the data files
|
default void |
storageHandlerCommit(Properties commitProperties,
boolean overwrite)
Deprecated.
|
default void |
storageHandlerCommit(Properties commitProperties,
Context.Operation operation)
Commits the inserts for the non-native tables.
|
default HiveStorageHandler.AcidSupportType |
supportsAcidOperations()
Specifies whether the table supports ACID operations or not (DELETE, UPDATE and MERGE statements).
|
default boolean |
supportsAppendData(org.apache.hadoop.hive.metastore.api.Table table,
boolean withPartClause)
Checks whether the table supports appending data files to the table.
|
default boolean |
supportsPartitionTransform()
Check if the underlying storage handler implementation support partition transformations.
|
default boolean |
supportsSortColumns()
Check if the underlying storage handler implementation supports sort columns.
|
default boolean |
supportsTruncateOnNonNativeTables()
Check if the underlying storage handler implementation supports truncate operation
for non native tables.
|
default void |
validatePartSpec(Table hmsTable,
Map<String,String> partitionSpec) |
default void |
validateSinkDesc(FileSinkDesc sinkDesc)
Validates whether the sink operation is permitted for the specific storage handler, based
on information contained in the sinkDesc.
|
static final List<AlterTableType> DEFAULT_ALLOWED_ALTER_OPS
Class<? extends org.apache.hadoop.mapred.InputFormat> getInputFormatClass()
InputFormatClass<? extends org.apache.hadoop.mapred.OutputFormat> getOutputFormatClass()
OutputFormatClass<? extends AbstractSerDe> getSerDeClass()
AbstractSerDeHiveMetaHook getMetaHook()
HiveAuthorizationProvider getAuthorizationProvider() throws HiveException
HiveExceptionvoid configureInputJobProperties(TableDesc tableDesc, Map<String,String> jobProperties)
tableDesc - descriptor for the table being accessedjobProperties - receives properties copied or transformed
from the table propertiesvoid configureInputJobCredentials(TableDesc tableDesc, Map<String,String> secrets)
void configureOutputJobProperties(TableDesc tableDesc, Map<String,String> jobProperties)
tableDesc - descriptor for the table being accessedjobProperties - receives properties copied or transformed
from the table properties@Deprecated void configureTableJobProperties(TableDesc tableDesc, Map<String,String> jobProperties)
tableDesc - descriptor for the table being accessedjobProperties - receives properties copied or transformed
from the table propertiesvoid configureJobConf(TableDesc tableDesc, org.apache.hadoop.mapred.JobConf jobConf)
tableDesc - descriptor for the table being accessedjobConf - jobConf for MapReduce jobdefault StorageHandlerInfo getStorageHandlerInfo(org.apache.hadoop.hive.metastore.api.Table table) throws org.apache.hadoop.hive.metastore.api.MetaException
table - table definitionorg.apache.hadoop.hive.metastore.api.MetaExceptiondefault StorageFormat.StorageHandlerTypes getType()
default org.apache.hadoop.hive.metastore.api.LockType getLockType(WriteEntity writeEntity)
default boolean addDynamicSplitPruningEdge(Table table, ExprNodeDesc syntheticFilterPredicate)
table - The table to filter.syntheticFilterPredicate - Join filter predicate.default Map<String,String> getOperatorDescProperties(OperatorDesc operatorDesc, Map<String,String> initialProps)
operatorDesc - operatorDescinitialProps - Map containing initial operator propertiesdefault Map<String,String> getBasicStatistics(Partish partish)
partish - a partish wrapper classdefault boolean canProvideBasicStatistics()
default List<org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj> getColStatistics(Table table)
table - default boolean setColStatistics(Table table, List<org.apache.hadoop.hive.metastore.api.ColumnStatistics> colStats)
table - colStats - default boolean canProvideColStatistics(Table tbl)
tbl - default boolean canSetColStatistics(Table tbl)
default boolean canComputeQueryUsingStats(Table tbl)
default StorageFormatDescriptor getStorageFormatDescriptor(org.apache.hadoop.hive.metastore.api.Table table) throws SemanticException
table - table objectSemanticExceptiondefault boolean supportsAppendData(org.apache.hadoop.hive.metastore.api.Table table, boolean withPartClause) throws SemanticException
table - the tablewithPartClause - whether a partition is specifiedSemanticException - in case of any error.default void appendFiles(org.apache.hadoop.hive.metastore.api.Table tbl, URI fromURI, boolean isOverwrite, Map<String,String> partitionSpec) throws SemanticException
tbl - the table object.fromURI - the source of files.isOverwrite - whether to overwrite the existing table data.partitionSpec - the partition spec.SemanticException - in case of any errordefault boolean directInsert()
Please note that the atomicity of the operation will suffer in this case, i.e. the created table might become exposed, depending on the implementation, before the CTAS or CMV operations finishes. Rollback (e.g. dropping the table) is also the responsibility of the storage handler in case of failures.
default boolean alwaysUnpartitioned()
default void setTableParametersForCTLT(Table tbl, CreateTableLikeDesc desc, Map<String,String> origParams)
tbl - the tabledesc - the table descriptororigParams - the original table propertiesdefault void setTableLocationForCTAS(CreateTableDesc desc, String location)
default Map<String,String> getNativeProperties(Table table)
table - the tabledefault boolean shouldOverwrite(Table mTable, Context.Operation operation)
mTable - the table.operation - operation type.default void addResourcesForCreateTable(Map<String,String> tblProps, HiveConf hiveConf)
tblProps - table propertieshiveConf - configurationdefault HiveStorageHandler.AcidSupportType supportsAcidOperations()
default List<VirtualColumn> acidVirtualColumns()
supportsAcidOperations() returns something
other NONE.default List<org.apache.hadoop.hive.metastore.api.FieldSchema> acidSelectColumns(Table table, Context.Operation operation)
UpdateSemanticAnalyzer rewrites UPDATE and
DeleteSemanticAnalyzer rewrites DELETE queries into INSERT queries.
- DELETE FROM T WHERE A = 32 is rewritten into
INSERT INTO T SELECT <selectCols> FROM T WHERE A = 32 SORT BY <sortCols>.
- UPDATE T SET B=12 WHERE A = 32 is rewritten into
INSERT INTO T SELECT <selectCols>, <newValues> FROM T WHERE A = 32 SORT BY <sortCols>.
This method specifies which columns should be injected into the <selectCols> part of the rewritten query.
Should only return a non-empty list if
supportsAcidOperations() returns something
other NONE.table - the table which is being deleted/updated/merged intooperation - the operation type we are executingdefault org.apache.hadoop.hive.metastore.api.FieldSchema getRowId()
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> acidSortColumns(Table table, Context.Operation operation)
UpdateSemanticAnalyzer rewrites UPDATE and
DeleteSemanticAnalyzer rewrites DELETE queries into INSERT
queries. E.g. DELETE FROM T WHERE A = 32 is rewritten into
INSERT INTO T SELECT <selectCols> FROM T WHERE A = 32 SORT BY <sortCols>.
This method specifies which columns should be injected into the <sortCols> part of the rewritten query.
Should only return a non-empty list if
supportsAcidOperations() returns something
other NONE.table - the table which is being deleted/updated/merged intooperation - the operation type we are executingdefault boolean supportsSortColumns()
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> sortColumns(Table table)
table - the table which is being sorteddefault boolean supportsPartitionTransform()
default List<TransformSpec> getPartitionTransformSpec(Table table)
supportsPartitionTransform() returns true.table - the HMS table, must be non-nulldefault DynamicPartitionCtx createDPContext(HiveConf conf, Table table, Context.Operation writeOperation) throws SemanticException
conf - job conftable - the HMS tableSemanticExceptiondefault String getFileFormatPropertyKey()
default boolean commitInMoveTask()
MoveTask and use the
storageHandlerCommit(Properties, boolean) method for committing inserts instead of
DefaultHiveMetaHook.commitInsertTable(Table, boolean).storageHandlerCommit(Properties, boolean) methoddefault void storageHandlerCommit(Properties commitProperties, Context.Operation operation) throws HiveException
MoveTask.commitProperties - Commit properties which are needed for the handler based commitoperation - the operation typeHiveException - If there is an error during commit@Deprecated default void storageHandlerCommit(Properties commitProperties, boolean overwrite) throws HiveException
HiveExceptiondefault boolean isAllowedAlterOperation(AlterTableType opType)
opType - The alter operation type (e.g. RENAME_COLUMNS)default boolean supportsTruncateOnNonNativeTables()
default boolean isTimeTravelAllowed()
@Deprecated default boolean isMetadataTableSupported()
isTableMetaRefSupported()default boolean isTableMetaRefSupported()
default boolean isValidMetadataTable(String metaTableName)
default Table checkAndSetTableMetaRef(Table hmsTable, String tableMetaRef) throws SemanticException
SemanticExceptiondefault URI getURIForAuth(org.apache.hadoop.hive.metastore.api.Table table) throws URISyntaxException
table - The HMS table objectURISyntaxExceptiondefault void validateSinkDesc(FileSinkDesc sinkDesc) throws SemanticException
sinkDesc - The sink descriptorSemanticException - if the sink operation is not alloweddefault void executeOperation(Table table, AlterTableExecuteSpec executeSpec)
executeSpec - operation specificationdefault void alterTableSnapshotRefOperation(Table table, AlterTableSnapshotRefSpec alterTableSnapshotRefSpec)
default boolean areSnapshotsSupported()
default SnapshotContext getCurrentSnapshotContext(Table table)
table - - Table which snapshot context should be returned.SnapshotContext wraps the snapshotId or null if no snapshot present.default void prepareAlterTableEnvironmentContext(AbstractAlterTableDesc alterTableDesc, org.apache.hadoop.hive.metastore.api.EnvironmentContext environmentContext)
alterTableDesc - the alter table desc (e.g.: AlterTableSetPropertiesDesc) containing the work to doenvironmentContext - an existing EnvironmentContext created prior, now to be filled/amendeddefault Boolean hasAppendsOnly(Table hmsTable, SnapshotContext since)
default List<String> showPartitions(DDLOperationContext context, Table tbl) throws UnsupportedOperationException, HiveException
UnsupportedOperationExceptionHiveExceptiondefault void validatePartSpec(Table hmsTable, Map<String,String> partitionSpec) throws SemanticException
SemanticExceptiondefault boolean canUseTruncate(Table hmsTable, Map<String,String> partitionSpec) throws SemanticException
SemanticExceptiondefault List<String> getPartitionNames(Table hmsTable, Map<String,String> partitionSpec) throws SemanticException
SemanticExceptiondefault ColumnInfo getColumnInfo(Table hmsTable, String colName) throws SemanticException
SemanticExceptiondefault boolean canPerformMetadataDelete(Table hmsTable, String branchName, SearchArgument searchArgument)
default List<org.apache.hadoop.hive.metastore.api.FieldSchema> getPartitionKeys(Table hmsTable)
default List<Partition> getPartitionsByExpr(Table hmsTable, ExprNodeDesc desc) throws SemanticException
SemanticExceptionCopyright © 2024 The Apache Software Foundation. All rights reserved.