Skip to main content

vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub use file_statistics::FileStatistics;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::dtype::DType;
28use vortex_array::vtable::ArrayId;
29use vortex_buffer::ByteBuffer;
30use vortex_error::VortexResult;
31use vortex_error::vortex_bail;
32use vortex_error::vortex_err;
33use vortex_flatbuffers::FlatBuffer;
34use vortex_flatbuffers::footer as fb;
35use vortex_layout::LayoutEncodingId;
36use vortex_layout::LayoutRef;
37use vortex_layout::layout_from_flatbuffer;
38use vortex_layout::session::LayoutSessionExt;
39use vortex_session::VortexSession;
40use vortex_session::registry::ReadContext;
41
42/// Captures the layout information of a Vortex file.
43#[derive(Debug, Clone)]
44pub struct Footer {
45    root_layout: LayoutRef,
46    segments: Arc<[SegmentSpec]>,
47    statistics: Option<FileStatistics>,
48    // The specific arrays used within the file, in the order they were registered.
49    array_read_ctx: ReadContext,
50    // The approximate size of the footer in bytes, used for caching and memory management.
51    approx_byte_size: Option<usize>,
52}
53
54impl Footer {
55    pub(crate) fn new(
56        root_layout: LayoutRef,
57        segments: Arc<[SegmentSpec]>,
58        statistics: Option<FileStatistics>,
59        array_read_ctx: ReadContext,
60    ) -> Self {
61        Self {
62            root_layout,
63            segments,
64            statistics,
65            array_read_ctx,
66            approx_byte_size: None,
67        }
68    }
69
70    pub(crate) fn with_approx_byte_size(mut self, approx_byte_size: usize) -> Self {
71        self.approx_byte_size = Some(approx_byte_size);
72        self
73    }
74
75    /// Read the [`Footer`] from a flatbuffer.
76    pub(crate) fn from_flatbuffer(
77        footer_bytes: FlatBuffer,
78        layout_bytes: FlatBuffer,
79        dtype: DType,
80        statistics: Option<FileStatistics>,
81        session: &VortexSession,
82    ) -> VortexResult<Self> {
83        let approx_byte_size = footer_bytes.len() + layout_bytes.len();
84        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
85
86        // Create a LayoutContext from the registry.
87        let layout_specs = fb_footer.layout_specs();
88        let layout_ids: Arc<[_]> = layout_specs
89            .iter()
90            .flat_map(|e| e.iter())
91            .map(|encoding| LayoutEncodingId::new_arc(Arc::from(encoding.id())))
92            .collect();
93        let layout_read_ctx = ReadContext::new(layout_ids);
94
95        // Create an ArrayContext from the registry.
96        let array_specs = fb_footer.array_specs();
97        let array_ids: Arc<[_]> = array_specs
98            .iter()
99            .flat_map(|e| e.iter())
100            .map(|encoding| ArrayId::new_arc(Arc::from(encoding.id())))
101            .collect();
102        let array_read_ctx = ReadContext::new(array_ids);
103
104        let root_layout = layout_from_flatbuffer(
105            layout_bytes,
106            &dtype,
107            &layout_read_ctx,
108            &array_read_ctx,
109            session.layouts().registry(),
110        )?;
111
112        let segments: Arc<[SegmentSpec]> = fb_footer
113            .segment_specs()
114            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
115            .iter()
116            .map(SegmentSpec::try_from)
117            .try_collect()?;
118
119        // Note this assertion is `<=` since we allow zero-length segments
120        if !segments.is_sorted_by_key(|segment| segment.offset) {
121            vortex_bail!("Segment offsets are not ordered");
122        }
123
124        Ok(Self {
125            root_layout,
126            segments,
127            statistics,
128            array_read_ctx,
129            approx_byte_size: Some(approx_byte_size),
130        })
131    }
132
133    /// Returns the root [`LayoutRef`] of the file.
134    pub fn layout(&self) -> &LayoutRef {
135        &self.root_layout
136    }
137
138    /// Returns the segment map of the file.
139    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
140        &self.segments
141    }
142
143    /// Returns the statistics of the file.
144    pub fn statistics(&self) -> Option<&FileStatistics> {
145        self.statistics.as_ref()
146    }
147
148    /// Returns the [`DType`] of the file.
149    pub fn dtype(&self) -> &DType {
150        self.root_layout.dtype()
151    }
152
153    /// Returns the approximate size of the footer in bytes, used for caching and memory management.
154    pub fn approx_byte_size(&self) -> Option<usize> {
155        self.approx_byte_size
156    }
157
158    /// Returns the number of rows in the file.
159    pub fn row_count(&self) -> u64 {
160        self.root_layout.row_count()
161    }
162
163    /// Returns a serializer for this footer.
164    pub fn into_serializer(self) -> FooterSerializer {
165        FooterSerializer::new(self)
166    }
167
168    /// Create a deserializer for a Vortex file footer.
169    pub fn deserializer(eof_buffer: ByteBuffer, session: VortexSession) -> FooterDeserializer {
170        FooterDeserializer::new(eof_buffer, session)
171    }
172}