vortex_file/file.rs
1//! This module defines the [`VortexFile`] struct, which represents a Vortex file on disk or in memory.
2//!
3//! The `VortexFile` provides methods for accessing file metadata, creating segment sources for reading
4//! data from the file, and initiating scans to read the file's contents into memory as Vortex arrays.
5use std::sync::Arc;
6
7use vortex_array::stats::StatsSet;
8use vortex_dtype::DType;
9use vortex_error::VortexResult;
10use vortex_layout::LayoutReader;
11use vortex_layout::scan::ScanBuilder;
12use vortex_layout::segments::SegmentSource;
13use vortex_metrics::VortexMetrics;
14
15use crate::footer::Footer;
16
17/// Represents a Vortex file, providing access to its metadata and content.
18///
19/// A `VortexFile` is created by opening a Vortex file using [`VortexOpenOptions`](crate::VortexOpenOptions).
20/// It provides methods for accessing file metadata (such as row count, data type, and statistics)
21/// and for initiating scans to read the file's contents.
22#[derive(Clone)]
23pub struct VortexFile {
24 /// The footer of the Vortex file, containing metadata and layout information.
25 pub(crate) footer: Footer,
26 /// A factory for creating segment sources that read data from the file.
27 pub(crate) segment_source_factory: Arc<dyn SegmentSourceFactory>,
28 /// Metrics tied to the file.
29 pub(crate) metrics: VortexMetrics,
30}
31
32impl VortexFile {
33 /// Returns a reference to the file's footer, which contains metadata and layout information.
34 pub fn footer(&self) -> &Footer {
35 &self.footer
36 }
37
38 /// Returns the number of rows in the file.
39 pub fn row_count(&self) -> u64 {
40 self.footer.row_count()
41 }
42
43 /// Returns the data type of the file's contents.
44 pub fn dtype(&self) -> &DType {
45 self.footer.dtype()
46 }
47
48 /// Returns the file's statistics, if available.
49 ///
50 /// Statistics can be used for query optimization and data exploration.
51 pub fn file_stats(&self) -> Option<&Arc<[StatsSet]>> {
52 self.footer.statistics()
53 }
54
55 /// Returns a reference to the file's metrics.
56 pub fn metrics(&self) -> &VortexMetrics {
57 &self.metrics
58 }
59
60 /// Create a new segment source for reading from the file.
61 ///
62 /// This may spawn a background I/O driver that will exit when the returned segment source
63 /// is dropped.
64 pub fn segment_source(&self) -> Arc<dyn SegmentSource> {
65 self.segment_source_factory
66 .segment_source(self.metrics.clone())
67 }
68
69 /// Create a new layout reader for the file.
70 pub fn layout_reader(&self) -> VortexResult<Arc<dyn LayoutReader>> {
71 let segment_source = self.segment_source();
72 self.footer
73 .layout()
74 .reader(&segment_source, self.footer().ctx())
75 }
76
77 /// Initiate a scan of the file, returning a builder for configuring the scan.
78 pub fn scan(&self) -> VortexResult<ScanBuilder> {
79 Ok(ScanBuilder::new(self.layout_reader()?).with_metrics(self.metrics.clone()))
80 }
81}
82
83/// A factory for creating segment sources that read data from a Vortex file.
84///
85/// This trait abstracts over different implementations of segment sources, allowing
86/// for different I/O strategies (e.g., synchronous, asynchronous, memory-mapped)
87/// to be used with the same file interface.
88pub trait SegmentSourceFactory: 'static + Send + Sync {
89 /// Create a segment source for reading segments from the file.
90 ///
91 /// # Arguments
92 ///
93 /// * `metrics` - Metrics for monitoring the performance of the segment source.
94 ///
95 /// # Returns
96 ///
97 /// A new segment source that can be used to read data from the file.
98 fn segment_source(&self, metrics: VortexMetrics) -> Arc<dyn SegmentSource>;
99}