public class ParquetFileWriter extends Object
| Modifier and Type | Field and Description |
|---|---|
static int |
CURRENT_VERSION |
static byte[] |
MAGIC |
static String |
PARQUET_METADATA_FILE |
| Constructor and Description |
|---|
ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file) |
| Modifier and Type | Method and Description |
|---|---|
void |
end(Map<String,String> extraMetaData)
ends a file once all blocks have been written.
|
void |
endBlock()
ends a block once all column chunks have been written
|
void |
endColumn()
end a column (once all rep, def and data have been written)
|
long |
getPos() |
void |
start()
start the file
|
void |
startBlock(long recordCount)
start a block
|
void |
startColumn(ColumnDescriptor descriptor,
long valueCount,
CompressionCodecName compressionCodecName)
start a column inside a block
|
void |
writeDataPage(int valueCount,
int uncompressedPageSize,
BytesInput bytes,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
writes a single page
|
void |
writeDictionaryPage(DictionaryPage dictionaryPage)
writes a dictionary page page
|
static void |
writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers)
writes a _metadata file
|
public static final String PARQUET_METADATA_FILE
public static final byte[] MAGIC
public static final int CURRENT_VERSION
public ParquetFileWriter(org.apache.hadoop.conf.Configuration configuration,
MessageType schema,
org.apache.hadoop.fs.Path file)
throws IOException
schema - the schema of the dataout - the file to write tocodec - the codec to use to compress blocksIOException - if the file can not be createdpublic void start()
throws IOException
IOExceptionpublic void startBlock(long recordCount)
throws IOException
recordCount - the record count in this blockIOExceptionpublic void startColumn(ColumnDescriptor descriptor, long valueCount, CompressionCodecName compressionCodecName) throws IOException
descriptor - the column descriptorvalueCount - the value count in this columncompressionCodecName - IOExceptionpublic void writeDictionaryPage(DictionaryPage dictionaryPage) throws IOException
dictionaryPage - the dictionary pageIOExceptionpublic void writeDataPage(int valueCount,
int uncompressedPageSize,
BytesInput bytes,
Encoding rlEncoding,
Encoding dlEncoding,
Encoding valuesEncoding)
throws IOException
valueCount - count of valuesuncompressedPageSize - the size of the data once uncompressedbytes - the compressed data for the page without headerrlEncoding - encoding of the repetition leveldlEncoding - encoding of the definition levelvaluesEncoding - encoding of valuesIOExceptionpublic void endColumn()
throws IOException
IOExceptionpublic void endBlock()
throws IOException
IOExceptionpublic void end(Map<String,String> extraMetaData) throws IOException
extraMetaData - the extra meta data to write in the footerIOExceptionpublic static void writeMetadataFile(org.apache.hadoop.conf.Configuration configuration,
org.apache.hadoop.fs.Path outputPath,
List<Footer> footers)
throws IOException
configuration - the configuration to use to get the FileSystemoutputPath - the directory to write the _metadata file tofooters - the list of footers to mergeIOExceptionpublic long getPos()
throws IOException
IOExceptionCopyright © 2014. All Rights Reserved.