use std::io::Write;
use crate::parquet_thrift::{
ElementType, FieldType, ReadThrift, ThriftCompactInputProtocol, ThriftCompactOutputProtocol,
WriteThrift, WriteThriftField, read_thrift_vec,
};
use crate::{
errors::{ParquetError, Result},
thrift_struct,
};
thrift_struct!(
pub struct PageLocation {
1: required i64 offset
2: required i32 compressed_page_size
3: required i64 first_row_index
}
);
thrift_struct!(
pub struct OffsetIndexMetaData {
1: required list<PageLocation> page_locations
2: optional list<i64> unencoded_byte_array_data_bytes
}
);
impl OffsetIndexMetaData {
pub fn page_locations(&self) -> &Vec<PageLocation> {
&self.page_locations
}
pub fn unencoded_byte_array_data_bytes(&self) -> Option<&Vec<i64>> {
self.unencoded_byte_array_data_bytes.as_ref()
}
pub(super) fn try_from_fast<'a, R: ThriftCompactInputProtocol<'a>>(
prot: &mut R,
) -> Result<Self> {
let (field_type, delta) = prot.read_field_header()?;
if delta != 1 || field_type != FieldType::List as u8 {
return Err(general_err!("error reading OffsetIndex::page_locations"));
}
let list_ident = prot.read_list_begin()?;
let mut page_locations = Vec::with_capacity(list_ident.size as usize);
for _ in 0..list_ident.size {
page_locations.push(read_page_location(prot)?);
}
let mut unencoded_byte_array_data_bytes: Option<Vec<i64>> = None;
let (mut field_type, delta) = prot.read_field_header()?;
if field_type == FieldType::List as u8 {
if delta != 1 {
return Err(general_err!(
"encountered unknown field while reading OffsetIndex"
));
}
let vec = read_thrift_vec::<i64, R>(&mut *prot)?;
unencoded_byte_array_data_bytes = Some(vec);
(field_type, _) = prot.read_field_header()?;
}
if field_type != FieldType::Stop as u8 {
return Err(general_err!(
"encountered unknown field while reading OffsetIndex"
));
}
Ok(Self {
page_locations,
unencoded_byte_array_data_bytes,
})
}
}
fn read_page_location<'a, R: ThriftCompactInputProtocol<'a>>(prot: &mut R) -> Result<PageLocation> {
let (field_type, delta) = prot.read_field_header()?;
if delta != 1 || field_type != FieldType::I64 as u8 {
return Err(general_err!("error reading PageLocation::offset"));
}
let offset = prot.read_i64()?;
let (field_type, delta) = prot.read_field_header()?;
if delta != 1 || field_type != FieldType::I32 as u8 {
return Err(general_err!(
"error reading PageLocation::compressed_page_size"
));
}
let compressed_page_size = prot.read_i32()?;
let (field_type, delta) = prot.read_field_header()?;
if delta != 1 || field_type != FieldType::I64 as u8 {
return Err(general_err!("error reading PageLocation::first_row_index"));
}
let first_row_index = prot.read_i64()?;
let (field_type, _) = prot.read_field_header()?;
if field_type != FieldType::Stop as u8 {
return Err(general_err!("unexpected field in PageLocation"));
}
Ok(PageLocation {
offset,
compressed_page_size,
first_row_index,
})
}
#[cfg(test)]
mod tests {
use super::*;
use crate::parquet_thrift::tests::test_roundtrip;
#[test]
fn test_offset_idx_roundtrip() {
let page_locations = [
PageLocation {
offset: 0,
compressed_page_size: 10,
first_row_index: 0,
},
PageLocation {
offset: 10,
compressed_page_size: 20,
first_row_index: 100,
},
]
.to_vec();
let unenc = [0i64, 100i64].to_vec();
test_roundtrip(OffsetIndexMetaData {
page_locations: page_locations.clone(),
unencoded_byte_array_data_bytes: Some(unenc),
});
test_roundtrip(OffsetIndexMetaData {
page_locations,
unencoded_byte_array_data_bytes: None,
});
}
}