1use std::ops::Range;
10use std::sync::Arc;
11
12use itertools::Itertools;
13use vortex_array::ArrayRef;
14use vortex_array::stats::StatsSet;
15use vortex_dtype::{DType, Field, FieldMask, FieldPath, FieldPathSet};
16use vortex_error::VortexResult;
17use vortex_expr::pruning::checked_pruning_expr;
18use vortex_expr::{ExprRef, Scope};
19use vortex_layout::LayoutReader;
20use vortex_layout::segments::SegmentSource;
21use vortex_metrics::VortexMetrics;
22use vortex_scan::{ScanBuilder, SplitBy};
23use vortex_utils::aliases::hash_map::HashMap;
24
25use crate::footer::Footer;
26use crate::pruning::extract_relevant_file_stats_as_struct_row;
27
28#[derive(Clone)]
34pub struct VortexFile {
35 pub(crate) footer: Footer,
37 pub(crate) segment_source: Arc<dyn SegmentSource>,
39 pub(crate) metrics: VortexMetrics,
41}
42
43impl VortexFile {
44 pub fn footer(&self) -> &Footer {
46 &self.footer
47 }
48
49 pub fn row_count(&self) -> u64 {
51 self.footer.row_count()
52 }
53
54 pub fn dtype(&self) -> &DType {
56 self.footer.dtype()
57 }
58
59 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
63 self.footer.statistics()
64 }
65
66 pub fn metrics(&self) -> &VortexMetrics {
68 &self.metrics
69 }
70
71 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
76 self.segment_source.clone()
77 }
78
79 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
81 let segment_source = self.segment_source();
82 self.footer
83 .layout()
84 .new_reader("".into(), segment_source)
86 }
87
88 pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
90 Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
91 }
92
93 pub fn can_prune(&self, filter: &ExprRef) -> VortexResult<bool> {
95 let Some((stats, fields)) = self
96 .footer
97 .statistics()
98 .zip(self.footer.dtype().as_struct_fields_opt())
99 else {
100 return Ok(false);
101 };
102
103 let set = FieldPathSet::from_iter(fields.names().iter().zip(stats.iter()).flat_map(
104 |(name, stats)| {
105 stats.iter().map(|(stat, _)| {
106 FieldPath::from_iter([
107 Field::Name(name.clone()),
108 Field::Name(stat.name().into()),
109 ])
110 })
111 },
112 ));
113
114 let Some((predicate, required_stats)) = checked_pruning_expr(filter, &set) else {
115 return Ok(false);
116 };
117
118 let required_file_stats = HashMap::from_iter(
119 required_stats
120 .map()
121 .iter()
122 .map(|(path, stats)| (path.clone(), stats.clone())),
123 );
124
125 let Some(file_stats) =
126 extract_relevant_file_stats_as_struct_row(&required_file_stats, stats, fields)?
127 else {
128 return Ok(false);
129 };
130
131 let scope = Scope::new(file_stats);
132
133 Ok(predicate
134 .evaluate(&scope)?
135 .as_constant()
136 .is_some_and(|result| result.as_bool().value() == Some(true)))
137 }
138
139 pub fn splits(&self) -> VortexResult<Vec<Range<u64>>> {
140 Ok(SplitBy::Layout
141 .splits(self.layout_reader()?.as_ref(), &[FieldMask::All])?
142 .into_iter()
143 .tuple_windows()
144 .map(|(start, end)| start..end)
145 .collect())
146 }
147}