vortex_file/
file.rs

1//! This module defines the [`VortexFile`] struct, which represents a Vortex file on disk or in memory.
2//!
3//! The `VortexFile` provides methods for accessing file metadata, creating segment sources for reading
4//! data from the file, and initiating scans to read the file's contents into memory as Vortex arrays.
5use std::sync::Arc;
6
7use vortex_array::stats::StatsSet;
8use vortex_dtype::DType;
9use vortex_error::VortexResult;
10use vortex_layout::LayoutReader;
11use vortex_layout::scan::ScanBuilder;
12use vortex_layout::segments::SegmentSource;
13use vortex_metrics::VortexMetrics;
14
15use crate::footer::Footer;
16
17/// Represents a Vortex file, providing access to its metadata and content.
18///
19/// A `VortexFile` is created by opening a Vortex file using [`VortexOpenOptions`](crate::VortexOpenOptions).
20/// It provides methods for accessing file metadata (such as row count, data type, and statistics)
21/// and for initiating scans to read the file's contents.
22#[derive(Clone)]
23pub struct VortexFile {
24    /// The footer of the Vortex file, containing metadata and layout information.
25    pub(crate) footer: Footer,
26    /// A factory for creating segment sources that read data from the file.
27    pub(crate) segment_source_factory: Arc<dyn SegmentSourceFactory>,
28    /// Metrics tied to the file.
29    pub(crate) metrics: VortexMetrics,
30}
31
32impl VortexFile {
33    /// Returns a reference to the file's footer, which contains metadata and layout information.
34    pub fn footer(&self) -> &Footer {
35        &self.footer
36    }
37
38    /// Returns the number of rows in the file.
39    pub fn row_count(&self) -> u64 {
40        self.footer.row_count()
41    }
42
43    /// Returns the data type of the file's contents.
44    pub fn dtype(&self) -> &DType {
45        self.footer.dtype()
46    }
47
48    /// Returns the file's statistics, if available.
49    ///
50    /// Statistics can be used for query optimization and data exploration.
51    pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
52        self.footer.statistics()
53    }
54
55    /// Returns a reference to the file's metrics.
56    pub fn metrics(&self) -> &VortexMetrics {
57        &self.metrics
58    }
59
60    /// Create a new segment source for reading from the file.
61    ///
62    /// This may spawn a background I/O driver that will exit when the returned segment source
63    /// is dropped.
64    pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
65        self.segment_source_factory
66            .segment_source(self.metrics.clone())
67    }
68
69    /// Create a new layout reader for the file.
70    pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
71        let segment_source = self.segment_source();
72        self.footer
73            .layout()
74            .reader(&segment_source, self.footer().ctx())
75    }
76
77    /// Initiate a scan of the file, returning a builder for configuring the scan.
78    pub fn scan(&self) -> VortexResult<ScanBuilder> {
79        Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
80    }
81}
82
83/// A factory for creating segment sources that read data from a Vortex file.
84///
85/// This trait abstracts over different implementations of segment sources, allowing
86/// for different I/O strategies (e.g., synchronous, asynchronous, memory-mapped)
87/// to be used with the same file interface.
88pub trait SegmentSourceFactory: 'static + Send + Sync {
89    /// Create a segment source for reading segments from the file.
90    ///
91    /// # Arguments
92    ///
93    /// * `metrics` - Metrics for monitoring the performance of the segment source.
94    ///
95    /// # Returns
96    ///
97    /// A new segment source that can be used to read data from the file.
98    fn segment_source(&self, metrics: VortexMetrics) -> Arc<dyn SegmentSource>;
99}