polars_io/parquet/read/
predicates.rs

1use polars_core::prelude::*;
2use polars_parquet::read::RowGroupMetadata;
3use polars_parquet::read::statistics::{ArrowColumnStatisticsArrays, deserialize_all};
4
5/// Collect the statistics in a row-group
6pub fn collect_statistics_with_live_columns(
7    row_groups: &[RowGroupMetadata],
8    schema: &ArrowSchema,
9    live_columns: &PlIndexSet<PlSmallStr>,
10) -> PolarsResult<Vec<Option<ArrowColumnStatisticsArrays>>> {
11    if row_groups.is_empty() {
12        return Ok((0..live_columns.len()).map(|_| None).collect());
13    }
14
15    let md = &row_groups[0];
16    live_columns
17        .iter()
18        .map(|c| {
19            let field = schema.get(c).unwrap();
20
21            // This can be None in the allow_missing_columns case.
22            let Some(idxs) = md.columns_idxs_under_root_iter(&field.name) else {
23                return Ok(None);
24            };
25
26            // 0 is possible for possible for empty structs.
27            //
28            // 2+ is for structs. We don't support reading nested statistics for now. It does not
29            // really make any sense at the moment with how we structure statistics.
30            if idxs.is_empty() || idxs.len() > 1 {
31                return Ok(None);
32            }
33
34            let idx = idxs[0];
35            Ok(deserialize_all(field, row_groups, idx)?)
36        })
37        .collect::<PolarsResult<Vec<_>>>()
38}