1use std::ops::Range;
10use std::sync::Arc;
11
12use itertools::Itertools;
13use vortex_array::ArrayRef;
14use vortex_array::expr::Expression;
15use vortex_array::expr::pruning::checked_pruning_expr;
16use vortex_array::stats::StatsSet;
17use vortex_dtype::DType;
18use vortex_dtype::Field;
19use vortex_dtype::FieldMask;
20use vortex_dtype::FieldPath;
21use vortex_dtype::FieldPathSet;
22use vortex_error::VortexResult;
23use vortex_layout::LayoutReader;
24use vortex_layout::segments::SegmentSource;
25use vortex_metrics::VortexMetrics;
26use vortex_scan::ScanBuilder;
27use vortex_scan::SplitBy;
28use vortex_session::VortexSession;
29use vortex_utils::aliases::hash_map::HashMap;
30
31use crate::footer::Footer;
32use crate::pruning::extract_relevant_file_stats_as_struct_row;
33
34#[derive(Clone)]
40pub struct VortexFile {
41 pub(crate) footer: Footer,
43 pub(crate) segment_source: Arc<dyn SegmentSource>,
45 pub(crate) metrics: VortexMetrics,
47 pub(crate) session: VortexSession,
49}
50
51impl VortexFile {
52 pub fn footer(&self) -> &Footer {
54 &self.footer
55 }
56
57 pub fn row_count(&self) -> u64 {
59 self.footer.row_count()
60 }
61
62 pub fn dtype(&self) -> &DType {
64 self.footer.dtype()
65 }
66
67 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
71 self.footer.statistics()
72 }
73
74 pub fn metrics(&self) -> &VortexMetrics {
76 &self.metrics
77 }
78
79 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
84 self.segment_source.clone()
85 }
86
87 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
89 let segment_source = self.segment_source();
90 self.footer
91 .layout()
92 .new_reader("".into(), segment_source, &self.session)
94 }
95
96 pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
98 Ok(
99 ScanBuilder::new(self.session.clone(), self.layout_reader()?)
100 .with_metrics(self.metrics.clone()),
101 )
102 }
103
104 #[cfg(gpu_unstable)]
105 pub fn gpu_scan(
106 &self,
107 ctx: Arc<cudarc::driver::CudaContext>,
108 ) -> VortexResult<vortex_scan::gpu::GpuScanBuilder<vortex_gpu::GpuVector>> {
109 let segment_source = self.segment_source();
110 let gpu_reader = self
111 .footer
112 .layout()
113 .new_gpu_reader("".into(), segment_source, ctx)?;
114
115 Ok(vortex_scan::gpu::GpuScanBuilder::new(
116 self.session.clone(),
117 gpu_reader,
118 ))
119 }
120
121 pub fn can_prune(&self, filter: &Expression) -> VortexResult<bool> {
123 let Some((stats, fields)) = self
124 .footer
125 .statistics()
126 .zip(self.footer.dtype().as_struct_fields_opt())
127 else {
128 return Ok(false);
129 };
130
131 let set = FieldPathSet::from_iter(fields.names().iter().zip(stats.iter()).flat_map(
132 |(name, stats)| {
133 stats.iter().map(|(stat, _)| {
134 FieldPath::from_iter([
135 Field::Name(name.clone()),
136 Field::Name(stat.name().into()),
137 ])
138 })
139 },
140 ));
141
142 let Some((predicate, required_stats)) = checked_pruning_expr(filter, &set) else {
143 return Ok(false);
144 };
145
146 let required_file_stats = HashMap::from_iter(
147 required_stats
148 .map()
149 .iter()
150 .map(|(path, stats)| (path.clone(), stats.clone())),
151 );
152
153 let Some(file_stats) =
154 extract_relevant_file_stats_as_struct_row(&required_file_stats, stats, fields)?
155 else {
156 return Ok(false);
157 };
158
159 Ok(predicate
160 .evaluate(&file_stats)?
161 .as_constant()
162 .is_some_and(|result| result.as_bool().value() == Some(true)))
163 }
164
165 pub fn splits(&self) -> VortexResult<Vec<Range<u64>>> {
166 let reader = self.layout_reader()?;
167 Ok(SplitBy::Layout
168 .splits(reader.as_ref(), &(0..reader.row_count()), &[FieldMask::All])?
169 .into_iter()
170 .tuple_windows()
171 .map(|(start, end)| start..end)
172 .collect())
173 }
174}