1use std::ops::Range;
10use std::sync::Arc;
11
12use vortex_array::ArrayRef;
13use vortex_array::stats::StatsSet;
14use vortex_dtype::{DType, Field, FieldMask, FieldPath, FieldPathSet};
15use vortex_error::VortexResult;
16use vortex_expr::pruning::checked_pruning_expr;
17use vortex_expr::{ExprRef, Scope};
18use vortex_layout::LayoutReader;
19use vortex_layout::segments::SegmentSource;
20use vortex_metrics::VortexMetrics;
21use vortex_scan::{ScanBuilder, SplitBy};
22use vortex_utils::aliases::hash_map::HashMap;
23
24use crate::footer::Footer;
25use crate::pruning::extract_relevant_file_stats_as_struct_row;
26
27#[derive(Clone)]
33pub struct VortexFile {
34 pub(crate) footer: Footer,
36 pub(crate) segment_source: Arc<dyn SegmentSource>,
38 pub(crate) metrics: VortexMetrics,
40}
41
42impl VortexFile {
43 pub fn footer(&self) -> &Footer {
45 &self.footer
46 }
47
48 pub fn row_count(&self) -> u64 {
50 self.footer.row_count()
51 }
52
53 pub fn dtype(&self) -> &DType {
55 self.footer.dtype()
56 }
57
58 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
62 self.footer.statistics()
63 }
64
65 pub fn metrics(&self) -> &VortexMetrics {
67 &self.metrics
68 }
69
70 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
75 self.segment_source.clone()
76 }
77
78 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
80 let segment_source = self.segment_source();
81 self.footer
82 .layout()
83 .new_reader("".into(), segment_source)
85 }
86
87 pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
89 Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
90 }
91
92 pub fn can_prune(&self, filter: &ExprRef) -> VortexResult<bool> {
94 let Some((stats, fields)) = self
95 .footer
96 .statistics()
97 .zip(self.footer.dtype().as_struct_fields_opt())
98 else {
99 return Ok(false);
100 };
101
102 let set = FieldPathSet::from_iter(fields.names().iter().zip(stats.iter()).flat_map(
103 |(name, stats)| {
104 stats.iter().map(|(stat, _)| {
105 FieldPath::from_iter([
106 Field::Name(name.clone()),
107 Field::Name(stat.name().into()),
108 ])
109 })
110 },
111 ));
112
113 let Some((predicate, required_stats)) = checked_pruning_expr(filter, &set) else {
114 return Ok(false);
115 };
116
117 let required_file_stats = HashMap::from_iter(
118 required_stats
119 .map()
120 .iter()
121 .map(|(path, stats)| (path.clone(), stats.clone())),
122 );
123
124 let Some(file_stats) =
125 extract_relevant_file_stats_as_struct_row(&required_file_stats, stats, fields)?
126 else {
127 return Ok(false);
128 };
129
130 let scope = Scope::new(file_stats);
131
132 Ok(predicate
133 .evaluate(&scope)?
134 .as_constant()
135 .is_some_and(|result| result.as_bool().value() == Some(true)))
136 }
137
138 pub fn splits(&self) -> VortexResult<Vec<Range<u64>>> {
139 SplitBy::Layout.splits(self.layout_reader()?.as_ref(), &[FieldMask::All])
140 }
141}