vortex_file/
file.rs

1//! This module defines the [`VortexFile`] struct, which represents a Vortex file on disk or in memory.
2//!
3//! The `VortexFile` provides methods for accessing file metadata, creating segment sources for reading
4//! data from the file, and initiating scans to read the file's contents into memory as Vortex arrays.
5use std::sync::Arc;
6
7use vortex_array::ArrayRef;
8use vortex_array::stats::StatsSet;
9use vortex_dtype::DType;
10use vortex_error::VortexResult;
11use vortex_layout::LayoutReader;
12use vortex_layout::scan::ScanBuilder;
13use vortex_layout::segments::SegmentSource;
14use vortex_metrics::VortexMetrics;
15
16use crate::footer::Footer;
17
18/// Represents a Vortex file, providing access to its metadata and content.
19///
20/// A `VortexFile` is created by opening a Vortex file using [`VortexOpenOptions`](crate::VortexOpenOptions).
21/// It provides methods for accessing file metadata (such as row count, data type, and statistics)
22/// and for initiating scans to read the file's contents.
23#[derive(Clone)]
24pub struct VortexFile {
25    /// The footer of the Vortex file, containing metadata and layout information.
26    pub(crate) footer: Footer,
27    /// A factory for creating segment sources that read data from the file.
28    pub(crate) segment_source_factory: Arc<dyn SegmentSourceFactory>,
29    /// Metrics tied to the file.
30    pub(crate) metrics: VortexMetrics,
31}
32
33impl VortexFile {
34    /// Returns a reference to the file's footer, which contains metadata and layout information.
35    pub fn footer(&self) -> &Footer {
36        &self.footer
37    }
38
39    /// Returns the number of rows in the file.
40    pub fn row_count(&self) -> u64 {
41        self.footer.row_count()
42    }
43
44    /// Returns the data type of the file's contents.
45    pub fn dtype(&self) -> &DType {
46        self.footer.dtype()
47    }
48
49    /// Returns the file's statistics, if available.
50    ///
51    /// Statistics can be used for query optimization and data exploration.
52    pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
53        self.footer.statistics()
54    }
55
56    /// Returns a reference to the file's metrics.
57    pub fn metrics(&self) -> &VortexMetrics {
58        &self.metrics
59    }
60
61    /// Create a new segment source for reading from the file.
62    ///
63    /// This may spawn a background I/O driver that will exit when the returned segment source
64    /// is dropped.
65    pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
66        self.segment_source_factory
67            .segment_source(self.metrics.clone())
68    }
69
70    /// Create a new layout reader for the file.
71    pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
72        let segment_source = self.segment_source();
73        self.footer
74            .layout()
75            .reader(&segment_source, self.footer().ctx())
76    }
77
78    /// Initiate a scan of the file, returning a builder for configuring the scan.
79    pub fn scan(&self) -> VortexResult<ScanBuilder<ArrayRef>> {
80        Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
81    }
82}
83
84/// A factory for creating segment sources that read data from a Vortex file.
85///
86/// This trait abstracts over different implementations of segment sources, allowing
87/// for different I/O strategies (e.g., synchronous, asynchronous, memory-mapped)
88/// to be used with the same file interface.
89pub trait SegmentSourceFactory: 'static + Send + Sync {
90    /// Create a segment source for reading segments from the file.
91    ///
92    /// # Arguments
93    ///
94    /// * `metrics` - Metrics for monitoring the performance of the segment source.
95    ///
96    /// # Returns
97    ///
98    /// A new segment source that can be used to read data from the file.
99    fn segment_source(&self, metrics: VortexMetrics) -> Arc<dyn SegmentSource>;
100}