vortex_file/file.rs
1//! This module defines the [`VortexFile`] struct, which represents a Vortex file on disk or in memory.
2//!
3//! The `VortexFile` provides methods for accessing file metadata, creating segment sources for reading
4//! data from the file, and initiating scans to read the file's contents into memory as Vortex arrays.
5use std::sync::Arc;
6
7use vortex_array::ArrayRef;
8use vortex_array::stats::StatsSet;
9use vortex_dtype::DType;
10use vortex_error::VortexResult;
11use vortex_layout::LayoutReader;
12use vortex_layout::scan::ScanBuilder;
13use vortex_layout::segments::SegmentSource;
14use vortex_metrics::VortexMetrics;
15
16use crate::footer::Footer;
17
18/// Represents a Vortex file, providing access to its metadata and content.
19///
20/// A `VortexFile` is created by opening a Vortex file using [`VortexOpenOptions`](crate::VortexOpenOptions).
21/// It provides methods for accessing file metadata (such as row count, data type, and statistics)
22/// and for initiating scans to read the file's contents.
23#[derive(Clone)]
24pub struct VortexFile {
25 /// The footer of the Vortex file, containing metadata and layout information.
26 pub(crate) footer: Footer,
27 /// A factory for creating segment sources that read data from the file.
28 pub(crate) segment_source_factory: Arc<dyn SegmentSourceFactory>,
29 /// Metrics tied to the file.
30 pub(crate) metrics: VortexMetrics,
31}
32
33impl VortexFile {
34 /// Returns a reference to the file's footer, which contains metadata and layout information.
35 pub fn footer(&self) -> &Footer {
36 &self.footer
37 }
38
39 /// Returns the number of rows in the file.
40 pub fn row_count(&self) -> u64 {
41 self.footer.row_count()
42 }
43
44 /// Returns the data type of the file's contents.
45 pub fn dtype(&self) -> &DType {
46 self.footer.dtype()
47 }
48
49 /// Returns the file's statistics, if available.
50 ///
51 /// Statistics can be used for query optimization and data exploration.
52 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
53 self.footer.statistics()
54 }
55
56 /// Returns a reference to the file's metrics.
57 pub fn metrics(&self) -> &VortexMetrics {
58 &self.metrics
59 }
60
61 /// Create a new segment source for reading from the file.
62 ///
63 /// This may spawn a background I/O driver that will exit when the returned segment source
64 /// is dropped.
65 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
66 self.segment_source_factory
67 .segment_source(self.metrics.clone())
68 }
69
70 /// Create a new layout reader for the file.
71 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
72 let segment_source = self.segment_source();
73 self.footer
74 .layout()
75 .reader(&segment_source, self.footer().ctx())
76 }
77
78 /// Initiate a scan of the file, returning a builder for configuring the scan.
79 pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
80 Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
81 }
82}
83
84/// A factory for creating segment sources that read data from a Vortex file.
85///
86/// This trait abstracts over different implementations of segment sources, allowing
87/// for different I/O strategies (e.g., synchronous, asynchronous, memory-mapped)
88/// to be used with the same file interface.
89pub trait SegmentSourceFactory: 'static + Send + Sync {
90 /// Create a segment source for reading segments from the file.
91 ///
92 /// # Arguments
93 ///
94 /// * `metrics` - Metrics for monitoring the performance of the segment source.
95 ///
96 /// # Returns
97 ///
98 /// A new segment source that can be used to read data from the file.
99 fn segment_source(&self, metrics: VortexMetrics) -> Arc<dyn SegmentSource>;
100}