public class ArrowConverter extends Object
| Constructor and Description |
|---|
ArrowConverter() |
| Modifier and Type | Method and Description |
|---|---|
static org.apache.arrow.vector.types.pojo.Field |
booleanField(String name)
Shortcut
|
static org.apache.arrow.vector.BitVector |
booleanVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.nd4j.linalg.api.ndarray.INDArray |
convertArrowVector(org.apache.arrow.vector.FieldVector fieldVector,
ColumnType type)
Convert a field vector to a column vector
|
static List<org.apache.arrow.vector.FieldVector> |
convertToArrowVector(org.nd4j.linalg.api.ndarray.INDArray from,
List<String> name,
ColumnType type,
org.apache.arrow.memory.BufferAllocator bufferAllocator)
Convert an
INDArray
to a list of column vectors or a singleton
list when either a row vector or a column vector |
static org.apache.arrow.vector.types.pojo.Field |
doubleField(String name)
Shortcut method for creating a double field
with 64 bit floating point
|
static org.apache.arrow.vector.Float8Vector |
doubleVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.apache.arrow.vector.types.pojo.Field |
field(String name,
org.apache.arrow.vector.types.pojo.ArrowType arrowType)
Shortcut method for returning a field
given an arrow type and name
with no sub fields
|
static org.apache.arrow.vector.types.pojo.Field |
floatField(String name)
Shortcut method for creating a double field
with 32 bit floating point
|
static org.apache.arrow.vector.Float4Vector |
floatVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static Writable |
fromEntry(int item,
org.apache.arrow.vector.FieldVector from,
ColumnType columnType)
Based on an input
ColumnType
get an entry from a FieldVector |
static org.apache.arrow.vector.types.pojo.Field |
getFieldForColumn(String name,
ColumnType columnType)
Create a field given the input
ColumnType
and name |
static org.apache.arrow.vector.types.pojo.Field |
intField(String name)
Shortcut method for creating a double field
with 32 bit integer field
|
static org.apache.arrow.vector.IntVector |
intVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.apache.arrow.vector.types.pojo.Field |
longField(String name)
Shortcut method for creating a long field
with 64 bit long field
|
static org.apache.arrow.vector.BigIntVector |
longVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.apache.arrow.vector.VarBinaryVector |
ndarrayVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
Create an ndarray vector that stores structs
of
INDArray
based on the org.apache.arrow.flatbuf.Tensor
format |
static org.apache.arrow.vector.dictionary.DictionaryProvider |
providerForVectors(List<org.apache.arrow.vector.FieldVector> vectors,
List<org.apache.arrow.vector.types.pojo.Field> fields)
Provide a value look up dictionary based on the
given set of input
FieldVector s for
reading and writing to arrow streams |
static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> |
readFromBytes(byte[] input)
Read a datavec schema and record set
from the given bytes (usually expected to be an arrow format file)
|
static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> |
readFromFile(File input)
Read a datavec schema and record set
from the given arrow file.
|
static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> |
readFromFile(FileInputStream input)
Read a datavec schema and record set
from the given arrow file.
|
static void |
setValue(ColumnType columnType,
org.apache.arrow.vector.FieldVector fieldVector,
Object value,
int row)
Set the value of the specified column vector
at the specified row based on the given value.
|
static org.apache.arrow.vector.types.pojo.Field |
stringField(String name) |
static org.apache.arrow.vector.VarCharVector |
stringVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.apache.arrow.vector.TimeStampMilliVector |
timeVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length) |
static org.nd4j.linalg.api.ndarray.INDArray |
toArray(ArrowWritableRecordBatch arrowWritableRecordBatch)
Create an ndarray from a matrix.
|
static org.nd4j.linalg.api.ndarray.INDArray |
toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch)
Create an ndarray from a matrix.
|
static List<org.apache.arrow.vector.FieldVector> |
toArrowColumns(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<List<Writable>> dataVecRecord)
Given a buffer allocator and datavec schema,
convert the passed in batch of records
to a set of arrow columns
|
static List<org.apache.arrow.vector.FieldVector> |
toArrowColumnsString(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<List<String>> dataVecRecord)
Convert a set of input strings to arrow columns
|
static List<org.apache.arrow.vector.FieldVector> |
toArrowColumnsStringSingle(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<String> dataVecRecord)
Convert a set of input strings to arrow columns
|
static List<org.apache.arrow.vector.FieldVector> |
toArrowColumnsStringTimeSeries(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<List<List<String>>> dataVecRecord)
Convert a set of input strings to arrow columns
for a time series.
|
static List<org.apache.arrow.vector.FieldVector> |
toArrowColumnsTimeSeries(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<List<List<Writable>>> dataVecRecord)
Convert a set of input strings to arrow columns
for a time series.
|
static <T> List<org.apache.arrow.vector.FieldVector> |
toArrowColumnsTimeSeriesHelper(org.apache.arrow.memory.BufferAllocator bufferAllocator,
Schema schema,
List<List<List<T>>> dataVecRecord)
Convert a set of input strings to arrow columns
for a time series.
|
static org.apache.arrow.vector.types.pojo.Schema |
toArrowSchema(Schema schema)
Convert a data vec
Schema
to an arrow Schema |
static ArrowWritableRecordBatch |
toArrowWritables(List<org.apache.arrow.vector.FieldVector> fieldVectors,
Schema schema)
Convert the input field vectors (the input data) and
the given schema to a proper list of writables.
|
static List<Writable> |
toArrowWritablesSingle(List<org.apache.arrow.vector.FieldVector> fieldVectors,
Schema schema)
Return a singular record based on the converted
writables result.
|
static List<List<List<Writable>>> |
toArrowWritablesTimeSeries(List<org.apache.arrow.vector.FieldVector> fieldVectors,
Schema schema,
int timeSeriesLength)
Convert the input field vectors (the input data) and
the given schema to a proper list of writables.
|
static Schema |
toDatavecSchema(org.apache.arrow.vector.types.pojo.Schema schema)
Convert an
Schema
to a datavec Schema |
static org.apache.arrow.vector.BitVector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
boolean[] data) |
static org.apache.arrow.vector.TimeStampMilliVector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
Date[] data) |
static org.apache.arrow.vector.Float8Vector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
double[] data) |
static org.apache.arrow.vector.Float4Vector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
float[] data) |
static org.apache.arrow.vector.VarBinaryVector |
vectorFor(org.apache.arrow.memory.BufferAllocator bufferAllocator,
String name,
org.nd4j.linalg.api.ndarray.INDArray[] data)
Returns a vector representing a tensor view
of each ndarray.
|
static org.apache.arrow.vector.IntVector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int[] data) |
static org.apache.arrow.vector.BigIntVector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
long[] data) |
static org.apache.arrow.vector.VarCharVector |
vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
String[] data) |
static void |
writeRecordBatchTo(org.apache.arrow.memory.BufferAllocator bufferAllocator,
List<List<Writable>> recordBatch,
Schema inputSchema,
OutputStream outputStream)
Write the records to the given output stream
|
static void |
writeRecordBatchTo(List<List<Writable>> recordBatch,
Schema inputSchema,
OutputStream outputStream)
Write the records to the given output stream
|
public static org.nd4j.linalg.api.ndarray.INDArray toArray(ArrowWritableRecordTimeSeriesBatch arrowWritableRecordBatch)
INDArray must be all the same dimensions.
Note that the input columns must also be numerical. If they aren't numerical already,
consider using an TransformProcess to transform the data
output from ArrowRecordReader in to the proper format
for usage with this method for direct conversion.arrowWritableRecordBatch - the incoming batch. This is typically output from
an ArrowRecordReaderINDArray representative of the input datapublic static org.nd4j.linalg.api.ndarray.INDArray toArray(ArrowWritableRecordBatch arrowWritableRecordBatch)
INDArray must be all the same dimensions.
Note that the input columns must also be numerical. If they aren't numerical already,
consider using an TransformProcess to transform the data
output from ArrowRecordReader in to the proper format
for usage with this method for direct conversion.arrowWritableRecordBatch - the incoming batch. This is typically output from
an ArrowRecordReaderINDArray representative of the input datapublic static org.nd4j.linalg.api.ndarray.INDArray convertArrowVector(org.apache.arrow.vector.FieldVector fieldVector,
ColumnType type)
fieldVector - the field vector to converttype - the type of the column vectorpublic static List<org.apache.arrow.vector.FieldVector> convertToArrowVector(org.nd4j.linalg.api.ndarray.INDArray from, List<String> name, ColumnType type, org.apache.arrow.memory.BufferAllocator bufferAllocator)
INDArray
to a list of column vectors or a singleton
list when either a row vector or a column vectorfrom - the input arrayname - the name of the vectortype - the type of the vectorbufferAllocator - the allocator to usepublic static void writeRecordBatchTo(List<List<Writable>> recordBatch, Schema inputSchema, OutputStream outputStream)
recordBatch - the record batch to writeinputSchema - the input schemaoutputStream - the output stream to write topublic static void writeRecordBatchTo(org.apache.arrow.memory.BufferAllocator bufferAllocator,
List<List<Writable>> recordBatch,
Schema inputSchema,
OutputStream outputStream)
recordBatch - the record batch to writeinputSchema - the input schemaoutputStream - the output stream to write topublic static List<List<List<Writable>>> toArrowWritablesTimeSeries(List<org.apache.arrow.vector.FieldVector> fieldVectors, Schema schema, int timeSeriesLength)
fieldVectors - the field vectors to useschema - the schema to usetimeSeriesLength - the length of the time seriespublic static ArrowWritableRecordBatch toArrowWritables(List<org.apache.arrow.vector.FieldVector> fieldVectors, Schema schema)
fieldVectors - the field vectors to useschema - the schema to usepublic static List<Writable> toArrowWritablesSingle(List<org.apache.arrow.vector.FieldVector> fieldVectors, Schema schema)
fieldVectors - the field vectors to useschema - the schema to use for inputpublic static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> readFromFile(FileInputStream input) throws IOException
input - the input to readIOExceptionpublic static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> readFromFile(File input) throws IOException
input - the input to readIOExceptionpublic static org.nd4j.linalg.primitives.Pair<Schema,ArrowWritableRecordBatch> readFromBytes(byte[] input) throws IOException
input - the input to readIOExceptionpublic static org.apache.arrow.vector.types.pojo.Schema toArrowSchema(Schema schema)
Schema
to an arrow Schemaschema - the input schemapublic static Schema toDatavecSchema(org.apache.arrow.vector.types.pojo.Schema schema)
Schema
to a datavec Schemaschema - the input arrow schemapublic static org.apache.arrow.vector.types.pojo.Field field(String name, org.apache.arrow.vector.types.pojo.ArrowType arrowType)
name - the name of the fieldarrowType - the arrow type of the fieldpublic static org.apache.arrow.vector.types.pojo.Field getFieldForColumn(String name, ColumnType columnType)
ColumnType
and namename - the name of the fieldcolumnType - the column type to addpublic static org.apache.arrow.vector.types.pojo.Field doubleField(String name)
name - the name of the fieldpublic static org.apache.arrow.vector.types.pojo.Field floatField(String name)
name - the name of the fieldpublic static org.apache.arrow.vector.types.pojo.Field intField(String name)
name - the name of the fieldpublic static org.apache.arrow.vector.types.pojo.Field longField(String name)
name - the name of the fieldpublic static org.apache.arrow.vector.types.pojo.Field stringField(String name)
name - public static org.apache.arrow.vector.types.pojo.Field booleanField(String name)
name - public static org.apache.arrow.vector.dictionary.DictionaryProvider providerForVectors(List<org.apache.arrow.vector.FieldVector> vectors, List<org.apache.arrow.vector.types.pojo.Field> fields)
FieldVector s for
reading and writing to arrow streamsvectors - the vectors to use as a lookupDictionaryProvider for the given
input FieldVector listpublic static List<org.apache.arrow.vector.FieldVector> toArrowColumns(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<List<Writable>> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to convertdataVecRecord - the data vec record batch to convertFieldVectorpublic static List<org.apache.arrow.vector.FieldVector> toArrowColumnsTimeSeries(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<List<List<Writable>>> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to usedataVecRecord - the collection of input strings to processpublic static <T> List<org.apache.arrow.vector.FieldVector> toArrowColumnsTimeSeriesHelper(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<List<List<T>>> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to usedataVecRecord - the collection of input strings to processpublic static List<org.apache.arrow.vector.FieldVector> toArrowColumnsStringSingle(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<String> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to usedataVecRecord - the collection of input strings to processpublic static List<org.apache.arrow.vector.FieldVector> toArrowColumnsStringTimeSeries(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<List<List<String>>> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to usedataVecRecord - the collection of input strings to processpublic static List<org.apache.arrow.vector.FieldVector> toArrowColumnsString(org.apache.arrow.memory.BufferAllocator bufferAllocator, Schema schema, List<List<String>> dataVecRecord)
bufferAllocator - the buffer allocator to useschema - the schema to usedataVecRecord - the collection of input strings to processpublic static void setValue(ColumnType columnType, org.apache.arrow.vector.FieldVector fieldVector, Object value, int row)
public static org.apache.arrow.vector.TimeStampMilliVector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
Date[] data)
allocator - name - data - public static org.apache.arrow.vector.TimeStampMilliVector timeVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - length - the length of the vectorpublic static org.apache.arrow.vector.VarBinaryVector vectorFor(org.apache.arrow.memory.BufferAllocator bufferAllocator,
String name,
org.nd4j.linalg.api.ndarray.INDArray[] data)
VarBinaryVectorbufferAllocator - the buffer allocator to usename - the name of the columndata - the input arrayspublic static org.apache.arrow.vector.VarCharVector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
String[] data)
allocator - name - data - public static org.apache.arrow.vector.VarBinaryVector ndarrayVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
INDArray
based on the org.apache.arrow.flatbuf.Tensor
formatallocator - the allocator to usename - the name of the vectorlength - the number of vectors to storepublic static org.apache.arrow.vector.VarCharVector stringVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - length - the length of the vectorpublic static org.apache.arrow.vector.Float4Vector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
float[] data)
allocator - name - data - public static org.apache.arrow.vector.Float4Vector floatVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - length - the length of the vectorpublic static org.apache.arrow.vector.Float8Vector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
double[] data)
allocator - name - data - public static org.apache.arrow.vector.Float8Vector doubleVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - length - the length of the vectorpublic static org.apache.arrow.vector.BitVector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
boolean[] data)
allocator - name - data - public static org.apache.arrow.vector.BitVector booleanVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - public static org.apache.arrow.vector.IntVector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int[] data)
allocator - name - data - public static org.apache.arrow.vector.IntVector intVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - public static org.apache.arrow.vector.BigIntVector vectorFor(org.apache.arrow.memory.BufferAllocator allocator,
String name,
long[] data)
allocator - name - data - public static org.apache.arrow.vector.BigIntVector longVectorOf(org.apache.arrow.memory.BufferAllocator allocator,
String name,
int length)
allocator - name - length - the number of rows in the column vectorpublic static Writable fromEntry(int item, org.apache.arrow.vector.FieldVector from, ColumnType columnType)
ColumnType
get an entry from a FieldVectoritem - the row of the item to get from the column vectorfrom - the column vector fromcolumnType - the column typeCopyright © 2019. All rights reserved.