Reader.Options| Modifier and Type | Field and Description |
|---|---|
protected int |
bufferSize |
protected CompressionCodec |
codec |
protected CompressionKind |
compressionKind |
protected org.apache.hadoop.conf.Configuration |
conf |
protected org.apache.hadoop.fs.FileSystem |
fileSystem |
protected OrcProto.Footer |
footer |
protected org.apache.hadoop.fs.Path |
path |
| Constructor and Description |
|---|
ReaderImpl(org.apache.hadoop.fs.Path path,
OrcFile.ReaderOptions options)
Constructor that let's the user specify additional options.
|
| Modifier and Type | Method and Description |
|---|---|
CompressionKind |
getCompression()
Get the compression kind.
|
int |
getCompressionSize()
Get the buffer size for the compression.
|
long |
getContentLength()
Get the length of the file.
|
org.apache.hadoop.hive.ql.io.orc.ReaderImpl.FileMetaInfo |
getFileMetaInfo() |
OrcFile.Version |
getFileVersion()
Get the file format version.
|
Metadata |
getMetadata()
Get the metadata information like stripe level column statistics etc.
|
List<String> |
getMetadataKeys()
Get the user metadata keys.
|
ByteBuffer |
getMetadataValue(String key)
Get a user metadata value.
|
long |
getNumberOfRows()
Get the number of rows in the file.
|
ObjectInspector |
getObjectInspector()
Get the object inspector for looking at the objects.
|
List<OrcProto.UserMetadataItem> |
getOrcProtoUserMetadata() |
long |
getRawDataSize()
Get the deserialized data size of the file
|
long |
getRawDataSizeOfColumns(List<String> colNames)
Get the deserialized data size of the specified columns
|
int |
getRowIndexStride()
Get the number of rows per a entry in the row index.
|
ColumnStatistics[] |
getStatistics()
Get the statistics about the columns in the file.
|
List<StripeInformation> |
getStripes()
Get the list of stripes.
|
List<OrcProto.Type> |
getTypes()
Get the list of types contained in the file.
|
OrcFile.WriterVersion |
getWriterVersion()
Get the version of the writer of this file.
|
boolean |
hasMetadataValue(String key)
Did the user set the given metadata value.
|
MetadataReader |
metadata() |
RecordReader |
rows()
Create a RecordReader that reads everything with the default options.
|
RecordReader |
rows(boolean[] include)
Create a RecordReader that will scan the entire file.
|
RecordReader |
rows(long offset,
long length,
boolean[] include)
Create a RecordReader that will start reading at the first stripe after
offset up to the stripe that starts at offset + length.
|
RecordReader |
rows(long offset,
long length,
boolean[] include,
SearchArgument sarg,
String[] columnNames)
Create a RecordReader that will read a section of a file.
|
RecordReader |
rowsOptions(Reader.Options options)
Create a RecordReader that uses the options given.
|
protected final org.apache.hadoop.fs.FileSystem fileSystem
protected final org.apache.hadoop.fs.Path path
protected final CompressionKind compressionKind
protected final CompressionCodec codec
protected final int bufferSize
protected final OrcProto.Footer footer
protected final org.apache.hadoop.conf.Configuration conf
public ReaderImpl(org.apache.hadoop.fs.Path path,
OrcFile.ReaderOptions options)
throws IOException
path - pathname for fileoptions - options for readingIOExceptionpublic long getNumberOfRows()
ReadergetNumberOfRows in interface Readerpublic List<String> getMetadataKeys()
ReadergetMetadataKeys in interface Readerpublic ByteBuffer getMetadataValue(String key)
ReadergetMetadataValue in interface Readerkey - a key given by the userpublic boolean hasMetadataValue(String key)
ReaderhasMetadataValue in interface Readerkey - the key to checkpublic CompressionKind getCompression()
ReadergetCompression in interface Readerpublic int getCompressionSize()
ReadergetCompressionSize in interface Readerpublic List<StripeInformation> getStripes()
ReadergetStripes in interface Readerpublic ObjectInspector getObjectInspector()
ReadergetObjectInspector in interface Readerpublic long getContentLength()
ReadergetContentLength in interface Readerpublic List<OrcProto.Type> getTypes()
Readerpublic OrcFile.Version getFileVersion()
ReadergetFileVersion in interface Readerpublic OrcFile.WriterVersion getWriterVersion()
ReadergetWriterVersion in interface Readerpublic int getRowIndexStride()
ReadergetRowIndexStride in interface Readerpublic ColumnStatistics[] getStatistics()
ReadergetStatistics in interface Readerpublic org.apache.hadoop.hive.ql.io.orc.ReaderImpl.FileMetaInfo getFileMetaInfo()
public RecordReader rows() throws IOException
Readerrows in interface ReaderIOExceptionpublic RecordReader rowsOptions(Reader.Options options) throws IOException
ReaderrowsOptions in interface Readeroptions - the options to read withIOExceptionpublic RecordReader rows(boolean[] include) throws IOException
Readerrows in interface Readerinclude - true for each column that should be includedIOExceptionpublic RecordReader rows(long offset, long length, boolean[] include) throws IOException
Readerrows in interface Readeroffset - a byte offset in the filelength - a number of bytes in the fileinclude - true for each column that should be includedIOExceptionpublic RecordReader rows(long offset, long length, boolean[] include, SearchArgument sarg, String[] columnNames) throws IOException
Readerrows in interface Readeroffset - the minimum offset of the first stripe to readlength - the distance from offset of the first address to stop reading
atinclude - true for each column that should be includedsarg - a search argument that limits the rows that should be read.columnNames - the names of the included columnsIOExceptionpublic long getRawDataSize()
ReadergetRawDataSize in interface Readerpublic long getRawDataSizeOfColumns(List<String> colNames)
ReadergetRawDataSizeOfColumns in interface Readerpublic Metadata getMetadata() throws IOException
ReadergetMetadata in interface ReaderIOExceptionpublic List<OrcProto.UserMetadataItem> getOrcProtoUserMetadata()
public MetadataReader metadata() throws IOException
metadata in interface ReaderIOExceptionCopyright © 2017 The Apache Software Foundation. All rights reserved.