parquet2/metadata/
row_metadata.rs1use parquet_format_safe::RowGroup;
2
3use super::{column_chunk_metadata::ColumnChunkMetaData, schema_descriptor::SchemaDescriptor};
4use crate::{
5 error::{Error, Result},
6 write::ColumnOffsetsMetadata,
7};
8#[cfg(feature = "serde_types")]
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone)]
13#[cfg_attr(feature = "serde_types", derive(Deserialize, Serialize))]
14pub struct RowGroupMetaData {
15 columns: Vec<ColumnChunkMetaData>,
16 num_rows: usize,
17 total_byte_size: usize,
18}
19
20impl RowGroupMetaData {
21 pub fn new(
23 columns: Vec<ColumnChunkMetaData>,
24 num_rows: usize,
25 total_byte_size: usize,
26 ) -> RowGroupMetaData {
27 Self {
28 columns,
29 num_rows,
30 total_byte_size,
31 }
32 }
33
34 pub fn columns(&self) -> &[ColumnChunkMetaData] {
36 &self.columns
37 }
38
39 pub fn num_rows(&self) -> usize {
41 self.num_rows
42 }
43
44 pub fn total_byte_size(&self) -> usize {
46 self.total_byte_size
47 }
48
49 pub fn compressed_size(&self) -> usize {
51 self.columns
52 .iter()
53 .map(|c| c.compressed_size() as usize)
54 .sum::<usize>()
55 }
56
57 pub(crate) fn try_from_thrift(
59 schema_descr: &SchemaDescriptor,
60 rg: RowGroup,
61 ) -> Result<RowGroupMetaData> {
62 if schema_descr.columns().len() != rg.columns.len() {
63 return Err(Error::oos(format!("The number of columns in the row group ({}) must be equal to the number of columns in the schema ({})", rg.columns.len(), schema_descr.columns().len())));
64 }
65 let total_byte_size = rg.total_byte_size.try_into()?;
66 let num_rows = rg.num_rows.try_into()?;
67 let columns = rg
68 .columns
69 .into_iter()
70 .zip(schema_descr.columns())
71 .map(|(column_chunk, descriptor)| {
72 ColumnChunkMetaData::try_from_thrift(descriptor.clone(), column_chunk)
73 })
74 .collect::<Result<Vec<_>>>()?;
75
76 Ok(RowGroupMetaData {
77 columns,
78 num_rows,
79 total_byte_size,
80 })
81 }
82
83 pub(crate) fn into_thrift(self) -> RowGroup {
85 let file_offset = self
86 .columns
87 .iter()
88 .map(|c| {
89 ColumnOffsetsMetadata::from_column_chunk_metadata(c).calc_row_group_file_offset()
90 })
91 .next()
92 .unwrap_or(None);
93 let total_compressed_size = Some(self.compressed_size() as i64);
94 RowGroup {
95 columns: self.columns.into_iter().map(|v| v.into_thrift()).collect(),
96 total_byte_size: self.total_byte_size as i64,
97 num_rows: self.num_rows as i64,
98 sorting_columns: None,
99 file_offset,
100 total_compressed_size,
101 ordinal: None,
102 }
103 }
104}