1use std::sync::Arc;
10
11use vortex_array::ArrayRef;
12use vortex_array::stats::StatsSet;
13use vortex_dtype::{DType, Field, FieldPath, FieldPathSet};
14use vortex_error::VortexResult;
15use vortex_expr::pruning::checked_pruning_expr;
16use vortex_expr::{ExprRef, Scope};
17use vortex_layout::LayoutReader;
18use vortex_layout::segments::SegmentSource;
19use vortex_metrics::VortexMetrics;
20use vortex_scan::ScanBuilder;
21use vortex_utils::aliases::hash_map::HashMap;
22
23use crate::footer::Footer;
24use crate::pruning::extract_relevant_file_stats_as_struct_row;
25
26#[derive(Clone)]
32pub struct VortexFile {
33 pub(crate) footer: Footer,
35 pub(crate) segment_source: Arc<dyn SegmentSource>,
37 pub(crate) metrics: VortexMetrics,
39}
40
41impl VortexFile {
42 pub fn footer(&self) -> &Footer {
44 &self.footer
45 }
46
47 pub fn row_count(&self) -> u64 {
49 self.footer.row_count()
50 }
51
52 pub fn dtype(&self) -> &DType {
54 self.footer.dtype()
55 }
56
57 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
61 self.footer.statistics()
62 }
63
64 pub fn metrics(&self) -> &VortexMetrics {
66 &self.metrics
67 }
68
69 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
74 self.segment_source.clone()
75 }
76
77 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
79 let segment_source = self.segment_source();
80 self.footer
81 .layout()
82 .new_reader("".into(), segment_source)
84 }
85
86 pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
88 Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
89 }
90
91 pub fn can_prune(&self, filter: &ExprRef) -> VortexResult<bool> {
93 let Some((stats, fields)) = self
94 .footer
95 .statistics()
96 .zip(self.footer.dtype().as_struct_opt())
97 else {
98 return Ok(false);
99 };
100
101 let set = FieldPathSet::from_iter(fields.names().iter().zip(stats.iter()).flat_map(
102 |(name, stats)| {
103 stats.iter().map(|(stat, _)| {
104 FieldPath::from_iter([
105 Field::Name(name.clone()),
106 Field::Name(stat.name().into()),
107 ])
108 })
109 },
110 ));
111
112 let Some((predicate, required_stats)) = checked_pruning_expr(filter, &set) else {
113 return Ok(false);
114 };
115
116 let required_file_stats = HashMap::from_iter(
117 required_stats
118 .map()
119 .iter()
120 .map(|(path, stats)| (path.clone(), stats.clone())),
121 );
122
123 let Some(file_stats) =
124 extract_relevant_file_stats_as_struct_row(&required_file_stats, stats, fields)?
125 else {
126 return Ok(false);
127 };
128
129 let scope = Scope::new(file_stats);
130
131 Ok(predicate
132 .evaluate(&scope)?
133 .as_constant()
134 .is_some_and(|result| result.as_bool().value() == Some(true)))
135 }
136}