T - public class CarbonTableInputFormat<T> extends CarbonInputFormat<T>
| Modifier and Type | Field and Description |
|---|---|
static String |
DATABASE_NAME |
static String |
INPUT_FILES |
static String |
INPUT_SEGMENT_NUMBERS |
static String |
TABLE_NAME |
hitedStreamFiles, numBlocks, numSegments, numStreamFiles, numStreamSegments| Constructor and Description |
|---|
CarbonTableInputFormat() |
| Modifier and Type | Method and Description |
|---|---|
org.apache.carbondata.core.mutate.data.BlockMappingVO |
getBlockRowCount(org.apache.hadoop.mapreduce.Job job,
org.apache.carbondata.core.metadata.schema.table.CarbonTable table,
List<org.apache.carbondata.core.indexstore.PartitionSpec> partitions)
Get the row count of the Block and mapping of segment and Block count.
|
org.apache.carbondata.core.metadata.schema.table.CarbonTable |
getOrCreateCarbonTable(org.apache.hadoop.conf.Configuration configuration)
Get the cached CarbonTable or create it by TableInfo in `configuration`
|
org.apache.carbondata.core.readcommitter.ReadCommittedScope |
getReadCommitted(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.metadata.AbsoluteTableIdentifier identifier) |
org.apache.carbondata.core.datamap.Segment[] |
getSegmentsToAccess(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.readcommitter.ReadCommittedScope readCommittedScope)
return valid segment to access
|
List<org.apache.hadoop.mapreduce.InputSplit> |
getSplits(org.apache.hadoop.mapreduce.JobContext job)
Configurations FileInputFormat.INPUT_DIR
are used to get table path to read.
|
List<org.apache.hadoop.mapreduce.InputSplit> |
getSplitsOfOneSegment(org.apache.hadoop.mapreduce.JobContext job,
String targetSegment,
List<Integer> oldPartitionIdList,
org.apache.carbondata.core.metadata.schema.PartitionInfo partitionInfo)
Read data in one segment.
|
List<org.apache.hadoop.mapreduce.InputSplit> |
getSplitsOfStreaming(org.apache.hadoop.mapreduce.JobContext job,
List<org.apache.carbondata.core.datamap.Segment> streamSegments,
org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable) |
List<org.apache.hadoop.mapreduce.InputSplit> |
getSplitsOfStreaming(org.apache.hadoop.mapreduce.JobContext job,
List<org.apache.carbondata.core.datamap.Segment> streamSegments,
org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable,
org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf filterResolverIntf)
use file list in .carbonindex file to get the split of streaming.
|
protected org.apache.hadoop.mapreduce.lib.input.FileSplit |
makeSplit(String segmentId,
org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts,
org.apache.carbondata.core.statusmanager.FileFormat fileFormat) |
protected org.apache.hadoop.mapreduce.lib.input.FileSplit |
makeSplit(String segmentId,
org.apache.hadoop.fs.Path file,
long start,
long length,
String[] hosts,
String[] inMemoryHosts,
org.apache.carbondata.core.statusmanager.FileFormat fileFormat) |
void |
refreshSegmentCacheIfRequired(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable,
org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager updateStatusManager,
List<org.apache.carbondata.core.datamap.Segment> filteredSegmentToAccess)
Method to check and refresh segment cache
|
createQueryModel, createRecordReader, getAbsoluteTableIdentifier, getAccessStreamingSegments, getColumnProjection, getDatabaseName, getDataBlocksOfSegment, getDataTypeConverter, getFilterPredicates, getHitedStreamFiles, getNumBlocks, getNumSegments, getNumStreamFiles, getNumStreamSegments, getPartitionsToPrune, getReadCommittedScope, getReadSupportClass, getTableInfo, getTableName, getValidateSegmentsToAccess, isFgDataMapPruningEnable, isSplitable, projectAllColumns, setAccessStreamingSegments, setCarbonReadSupport, setColumnProjection, setColumnProjection, setDatabaseName, setDataTypeConverter, setFgDataMapPruning, setFilterPredicates, setPartitionIdList, setPartitionsToPrune, setQuerySegment, setQuerySegment, setReadCommittedScope, setSegmentsToAccess, setTableInfo, setTableName, setTablePath, setTransactionalTable, setValidateSegmentsToAccessaddInputPath, addInputPathRecursively, addInputPaths, computeSplitSize, getBlockIndex, getFormatMinSplitSize, getInputDirRecursive, getInputPathFilter, getInputPaths, getMaxSplitSize, getMinSplitSize, listStatus, makeSplit, makeSplit, setInputDirRecursive, setInputPathFilter, setInputPaths, setInputPaths, setMaxInputSplitSize, setMinInputSplitSizepublic static final String INPUT_SEGMENT_NUMBERS
public static final String INPUT_FILES
public static final String DATABASE_NAME
public static final String TABLE_NAME
public org.apache.carbondata.core.metadata.schema.table.CarbonTable getOrCreateCarbonTable(org.apache.hadoop.conf.Configuration configuration)
throws IOException
getOrCreateCarbonTable in class CarbonInputFormat<T>IOExceptionpublic List<org.apache.hadoop.mapreduce.InputSplit> getSplits(org.apache.hadoop.mapreduce.JobContext job) throws IOException
getSplits in class CarbonInputFormat<T>job - IOExceptionpublic void refreshSegmentCacheIfRequired(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable,
org.apache.carbondata.core.statusmanager.SegmentUpdateStatusManager updateStatusManager,
List<org.apache.carbondata.core.datamap.Segment> filteredSegmentToAccess)
throws IOException
job - carbonTable - updateStatusManager - filteredSegmentToAccess - IOExceptionpublic List<org.apache.hadoop.mapreduce.InputSplit> getSplitsOfStreaming(org.apache.hadoop.mapreduce.JobContext job, List<org.apache.carbondata.core.datamap.Segment> streamSegments, org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable) throws IOException
IOExceptionpublic List<org.apache.hadoop.mapreduce.InputSplit> getSplitsOfStreaming(org.apache.hadoop.mapreduce.JobContext job, List<org.apache.carbondata.core.datamap.Segment> streamSegments, org.apache.carbondata.core.metadata.schema.table.CarbonTable carbonTable, org.apache.carbondata.core.scan.filter.resolver.FilterResolverIntf filterResolverIntf) throws IOException
IOExceptionprotected org.apache.hadoop.mapreduce.lib.input.FileSplit makeSplit(String segmentId, org.apache.hadoop.fs.Path file, long start, long length, String[] hosts, org.apache.carbondata.core.statusmanager.FileFormat fileFormat)
protected org.apache.hadoop.mapreduce.lib.input.FileSplit makeSplit(String segmentId, org.apache.hadoop.fs.Path file, long start, long length, String[] hosts, String[] inMemoryHosts, org.apache.carbondata.core.statusmanager.FileFormat fileFormat)
public List<org.apache.hadoop.mapreduce.InputSplit> getSplitsOfOneSegment(org.apache.hadoop.mapreduce.JobContext job, String targetSegment, List<Integer> oldPartitionIdList, org.apache.carbondata.core.metadata.schema.PartitionInfo partitionInfo)
job - targetSegment - oldPartitionIdList - get old partitionId before partitionInfo was changedpublic org.apache.carbondata.core.datamap.Segment[] getSegmentsToAccess(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.readcommitter.ReadCommittedScope readCommittedScope)
public org.apache.carbondata.core.mutate.data.BlockMappingVO getBlockRowCount(org.apache.hadoop.mapreduce.Job job,
org.apache.carbondata.core.metadata.schema.table.CarbonTable table,
List<org.apache.carbondata.core.indexstore.PartitionSpec> partitions)
throws IOException
IOExceptionpublic org.apache.carbondata.core.readcommitter.ReadCommittedScope getReadCommitted(org.apache.hadoop.mapreduce.JobContext job,
org.apache.carbondata.core.metadata.AbsoluteTableIdentifier identifier)
throws IOException
IOExceptionCopyright © 2016–2018 The Apache Software Foundation. All rights reserved.