orc_format/read/
column.rs

1use crate::{
2    error::Error,
3    proto::{stream::Kind, ColumnEncoding, CompressionKind, StripeFooter},
4};
5
6use super::decompress::Decompressor;
7
8/// Helper struct used to access the streams associated to an ORC column.
9/// Its main use [`Column::get_stream`], to get a stream.
10#[derive(Debug)]
11pub struct Column {
12    data: Vec<u8>,
13    column: u32,
14    number_of_rows: u64,
15    footer: StripeFooter,
16    compression: CompressionKind,
17}
18
19impl Column {
20    pub(crate) fn new(
21        data: Vec<u8>,
22        column: u32,
23        number_of_rows: u64,
24        footer: StripeFooter,
25        compression: CompressionKind,
26    ) -> Self {
27        Self {
28            data,
29            column,
30            number_of_rows,
31            footer,
32            compression,
33        }
34    }
35
36    /// Returns the stream `kind` associated to this column as a [`Decompressor`].
37    /// `scratch` becomes owned by [`Decompressor`], which you can recover via `into_inner`.
38    pub fn get_stream(&self, kind: Kind, scratch: Vec<u8>) -> Result<Decompressor, Error> {
39        let mut start = 0; // the start of the stream
40        self.footer
41            .streams
42            .iter()
43            .filter(|stream| stream.column() == self.column && stream.kind() != Kind::RowIndex)
44            .map(|stream| {
45                start += stream.length() as usize;
46                stream
47            })
48            .find(|stream| stream.kind() == kind)
49            .map(|stream| {
50                let length = stream.length() as usize;
51                let data = &self.data[start - length..start];
52                Decompressor::new(data, self.compression, scratch)
53            })
54            .ok_or(Error::InvalidKind(self.column, kind))
55    }
56
57    /// Returns the encoding of the column
58    pub fn encoding(&self) -> &ColumnEncoding {
59        &self.footer.columns[self.column as usize]
60    }
61
62    /// Returns the number of items in the dictionary, if any
63    pub fn dictionary_size(&self) -> Option<usize> {
64        self.footer.columns[self.column as usize]
65            .dictionary_size
66            .map(|x| x as usize)
67    }
68
69    /// The number of rows on this column
70    pub fn number_of_rows(&self) -> usize {
71        self.number_of_rows as usize
72    }
73
74    /// Returns the underlying footer and the pre-allocated memory region
75    /// containing all (compressed) streams of this column.
76    pub fn into_inner(self) -> (StripeFooter, Vec<u8>) {
77        (self.footer, self.data)
78    }
79}