public class ReaderImpl extends Object implements Reader
Modifier and Type | Class and Description |
---|---|
static class |
ReaderImpl.StripeInformationImpl |
Reader.Options
Modifier and Type | Field and Description |
---|---|
protected int |
bufferSize |
protected CompressionKind |
compressionKind |
protected Configuration |
conf |
protected FSDataInputStream |
file |
protected OrcFile.ReaderOptions |
options |
protected Path |
path |
protected int |
rowIndexStride |
protected List<OrcProto.StripeStatistics> |
stripeStatistics |
protected OrcTail |
tail |
protected List<OrcProto.Type> |
types |
protected boolean |
useUTCTimestamp |
Constructor and Description |
---|
ReaderImpl(Path path,
OrcFile.ReaderOptions options)
Constructor that let's the user specify additional options.
|
Modifier and Type | Method and Description |
---|---|
protected static void |
checkOrcVersion(Path path,
OrcProto.PostScript postscript)
Check to see if this ORC file is from a future version and if so,
warn the user that we may not be able to read all of the column encodings.
|
void |
close() |
ColumnStatistics[] |
deserializeStats(TypeDescription schema,
List<OrcProto.ColumnStatistics> fileStats) |
protected static void |
ensureOrcFooter(ByteBuffer buffer,
int psLen)
Deprecated.
|
protected static void |
ensureOrcFooter(FSDataInputStream in,
Path path,
int psLen,
ByteBuffer buffer)
Ensure this is an ORC file to prevent users from trying to read text
files or RC files as ORC files.
|
static OrcTail |
extractFileTail(ByteBuffer buffer)
Deprecated.
Use
extractFileTail(FileSystem, Path, long) instead.
This is for backward compatibility. |
static OrcTail |
extractFileTail(ByteBuffer buffer,
long fileLen,
long modificationTime)
Deprecated.
Use
extractFileTail(FileSystem, Path, long) instead.
This is for backward compatibility. |
protected OrcTail |
extractFileTail(FileSystem fs,
Path path,
long maxFileLength) |
static OrcProto.Metadata |
extractMetadata(ByteBuffer bb,
int metadataAbsPos,
int metadataSize,
InStream.StreamOptions options) |
EncryptionKey[] |
getColumnEncryptionKeys()
Get the list of encryption keys for column encryption.
|
CompressionKind |
getCompressionKind()
Get the compression kind.
|
int |
getCompressionSize()
Get the buffer size for the compression.
|
long |
getContentLength()
Get the length of the file.
|
boolean |
getConvertToProlepticGregorian()
Should the returned values use the proleptic Gregorian calendar?
|
DataMaskDescription[] |
getDataMasks()
Get the data masks for the unencrypted variant of the data.
|
ReaderEncryption |
getEncryption()
Internal access to our view of the encryption.
|
ReaderEncryptionVariant[] |
getEncryptionVariants()
Get the list of encryption variants for the data.
|
protected FileSystem |
getFileSystem() |
protected Supplier<FileSystem> |
getFileSystemSupplier() |
OrcProto.FileTail |
getFileTail()
Get the file tail (footer + postscript)
|
OrcFile.Version |
getFileVersion()
Get the file format version.
|
static OrcFile.Version |
getFileVersion(List<Integer> versionList) |
List<String> |
getMetadataKeys()
Get the user metadata keys.
|
int |
getMetadataSize() |
ByteBuffer |
getMetadataValue(String key)
Get a user metadata value.
|
long |
getNumberOfRows()
Get the number of rows in the file.
|
List<OrcProto.ColumnStatistics> |
getOrcProtoFileStatistics() |
List<OrcProto.StripeStatistics> |
getOrcProtoStripeStatistics() |
long |
getRawDataSize()
Get the deserialized data size of the file
|
long |
getRawDataSizeFromColIndices(List<Integer> colIndices)
Get the deserialized data size of the specified columns ids
|
static long |
getRawDataSizeFromColIndices(List<Integer> colIndices,
List<OrcProto.Type> types,
List<OrcProto.ColumnStatistics> stats) |
long |
getRawDataSizeOfColumns(List<String> colNames)
Get the deserialized data size of the specified columns
|
int |
getRowIndexStride()
Get the number of rows per a entry in the row index.
|
TypeDescription |
getSchema()
Get the type of rows in this ORC file.
|
ByteBuffer |
getSerializedFileFooter() |
ColumnStatistics[] |
getStatistics()
Get the statistics about the columns in the file.
|
List<StripeInformation> |
getStripes()
Get the list of stripes.
|
List<StripeStatistics> |
getStripeStatistics()
Get the stripe statistics for all of the columns.
|
List<StripeStatistics> |
getStripeStatistics(boolean[] included)
Get the stripe statistics from the file.
|
List<OrcProto.Type> |
getTypes()
Get the list of types contained in the file.
|
List<StripeStatistics> |
getVariantStripeStatistics(EncryptionVariant variant)
Get the stripe statistics for a given variant.
|
List<Integer> |
getVersionList() |
OrcFile.WriterVersion |
getWriterVersion()
Get the version of the writer of this file.
|
static OrcFile.WriterVersion |
getWriterVersion(int writerVersion)
Get the WriterVersion based on the ORC file postscript.
|
boolean |
hasMetadataValue(String key)
Did the user set the given metadata value.
|
Reader.Options |
options()
Create a default options object that can be customized for creating
a RecordReader.
|
RecordReader |
rows()
Create a RecordReader that reads everything with the default options.
|
RecordReader |
rows(Reader.Options options)
Create a RecordReader that uses the options given.
|
FSDataInputStream |
takeFile()
Take the file from the reader.
|
String |
toString() |
boolean |
writerUsedProlepticGregorian()
Was the file written using the proleptic Gregorian calendar.
|
protected final Path path
protected final OrcFile.ReaderOptions options
protected final CompressionKind compressionKind
protected FSDataInputStream file
protected int bufferSize
protected List<OrcProto.StripeStatistics> stripeStatistics
protected final List<OrcProto.Type> types
protected final int rowIndexStride
protected final Configuration conf
protected final boolean useUTCTimestamp
protected final OrcTail tail
public ReaderImpl(Path path, OrcFile.ReaderOptions options) throws IOException
path
- pathname for fileoptions
- options for readingIOException
public long getNumberOfRows()
Reader
getNumberOfRows
in interface Reader
public List<String> getMetadataKeys()
Reader
getMetadataKeys
in interface Reader
public ByteBuffer getMetadataValue(String key)
Reader
getMetadataValue
in interface Reader
key
- a key given by the userpublic boolean hasMetadataValue(String key)
Reader
hasMetadataValue
in interface Reader
key
- the key to checkpublic CompressionKind getCompressionKind()
Reader
getCompressionKind
in interface Reader
public int getCompressionSize()
Reader
getCompressionSize
in interface Reader
public List<StripeInformation> getStripes()
Reader
getStripes
in interface Reader
public long getContentLength()
Reader
getContentLength
in interface Reader
public List<OrcProto.Type> getTypes()
Reader
public static OrcFile.Version getFileVersion(List<Integer> versionList)
public OrcFile.Version getFileVersion()
Reader
getFileVersion
in interface Reader
public OrcFile.WriterVersion getWriterVersion()
Reader
getWriterVersion
in interface Reader
public OrcProto.FileTail getFileTail()
Reader
getFileTail
in interface Reader
public EncryptionKey[] getColumnEncryptionKeys()
Reader
getColumnEncryptionKeys
in interface Reader
public DataMaskDescription[] getDataMasks()
Reader
getDataMasks
in interface Reader
public ReaderEncryptionVariant[] getEncryptionVariants()
Reader
getEncryptionVariants
in interface Reader
public List<StripeStatistics> getVariantStripeStatistics(EncryptionVariant variant) throws IOException
Reader
getVariantStripeStatistics
in interface Reader
variant
- the encryption variant or null for unencryptedIOException
- if the required key is not availablepublic ReaderEncryption getEncryption()
public int getRowIndexStride()
Reader
getRowIndexStride
in interface Reader
public ColumnStatistics[] getStatistics()
Reader
getStatistics
in interface Reader
public ColumnStatistics[] deserializeStats(TypeDescription schema, List<OrcProto.ColumnStatistics> fileStats)
public TypeDescription getSchema()
Reader
protected static void ensureOrcFooter(FSDataInputStream in, Path path, int psLen, ByteBuffer buffer) throws IOException
in
- the file being readpath
- the filename for error messagespsLen
- the postscript lengthbuffer
- the tail of the fileIOException
protected static void ensureOrcFooter(ByteBuffer buffer, int psLen) throws IOException
ensureOrcFooter(FSDataInputStream, Path, int, ByteBuffer)
instead.psLen
- the postscript lengthbuffer
- the tail of the fileIOException
protected static void checkOrcVersion(Path path, OrcProto.PostScript postscript) throws IOException
path
- the data source path for error messagespostscript
- the parsed postscriptIOException
protected FileSystem getFileSystem() throws IOException
IOException
protected Supplier<FileSystem> getFileSystemSupplier()
public static OrcFile.WriterVersion getWriterVersion(int writerVersion)
writerVersion
- the integer writer versionpublic static OrcProto.Metadata extractMetadata(ByteBuffer bb, int metadataAbsPos, int metadataSize, InStream.StreamOptions options) throws IOException
IOException
public static OrcTail extractFileTail(ByteBuffer buffer) throws IOException
extractFileTail(FileSystem, Path, long)
instead.
This is for backward compatibility.IOException
public static OrcTail extractFileTail(ByteBuffer buffer, long fileLen, long modificationTime) throws IOException
extractFileTail(FileSystem, Path, long)
instead.
This is for backward compatibility.IOException
protected OrcTail extractFileTail(FileSystem fs, Path path, long maxFileLength) throws IOException
IOException
public ByteBuffer getSerializedFileFooter()
getSerializedFileFooter
in interface Reader
public boolean writerUsedProlepticGregorian()
Reader
writerUsedProlepticGregorian
in interface Reader
public boolean getConvertToProlepticGregorian()
Reader
getConvertToProlepticGregorian
in interface Reader
public Reader.Options options()
Reader
public RecordReader rows() throws IOException
Reader
rows
in interface Reader
IOException
public RecordReader rows(Reader.Options options) throws IOException
Reader
rows
in interface Reader
options
- the options to read withIOException
public long getRawDataSize()
Reader
getRawDataSize
in interface Reader
public long getRawDataSizeFromColIndices(List<Integer> colIndices)
Reader
getRawDataSizeFromColIndices
in interface Reader
colIndices
- - internal column id (check orcfiledump for column ids)public static long getRawDataSizeFromColIndices(List<Integer> colIndices, List<OrcProto.Type> types, List<OrcProto.ColumnStatistics> stats) throws FileFormatException
FileFormatException
public long getRawDataSizeOfColumns(List<String> colNames)
Reader
getRawDataSizeOfColumns
in interface Reader
colNames
- the list of column namespublic List<OrcProto.StripeStatistics> getOrcProtoStripeStatistics()
getOrcProtoStripeStatistics
in interface Reader
public List<OrcProto.ColumnStatistics> getOrcProtoFileStatistics()
getOrcProtoFileStatistics
in interface Reader
public List<StripeStatistics> getStripeStatistics() throws IOException
Reader
getStripeStatistics
in interface Reader
IOException
public List<StripeStatistics> getStripeStatistics(boolean[] included) throws IOException
Reader
getStripeStatistics
in interface Reader
included
- null for all columns or an array where the required columns
are selectedIOException
public List<Integer> getVersionList()
getVersionList
in interface Reader
public int getMetadataSize()
getMetadataSize
in interface Reader
public void close() throws IOException
close
in interface Closeable
close
in interface AutoCloseable
IOException
public FSDataInputStream takeFile()
Copyright © 2013–2021 The Apache Software Foundation. All rights reserved.