Skip to main content

vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub use file_statistics::FileStatistics;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::ArrayContext;
28use vortex_array::dtype::DType;
29use vortex_array::vtable::ArrayId;
30use vortex_buffer::ByteBuffer;
31use vortex_error::VortexResult;
32use vortex_error::vortex_bail;
33use vortex_error::vortex_err;
34use vortex_flatbuffers::FlatBuffer;
35use vortex_flatbuffers::footer as fb;
36use vortex_layout::LayoutContext;
37use vortex_layout::LayoutEncodingId;
38use vortex_layout::LayoutRef;
39use vortex_layout::layout_from_flatbuffer;
40use vortex_layout::session::LayoutSessionExt;
41use vortex_session::VortexSession;
42
43/// Captures the layout information of a Vortex file.
44#[derive(Debug, Clone)]
45pub struct Footer {
46    root_layout: LayoutRef,
47    segments: Arc<[SegmentSpec]>,
48    statistics: Option<FileStatistics>,
49    // The specific arrays used within the file, in the order they were registered.
50    array_ctx: ArrayContext,
51    // The approximate size of the footer in bytes, used for caching and memory management.
52    approx_byte_size: Option<usize>,
53}
54
55impl Footer {
56    pub(crate) fn new(
57        root_layout: LayoutRef,
58        segments: Arc<[SegmentSpec]>,
59        statistics: Option<FileStatistics>,
60        array_ctx: ArrayContext,
61    ) -> Self {
62        Self {
63            root_layout,
64            segments,
65            statistics,
66            array_ctx,
67            approx_byte_size: None,
68        }
69    }
70
71    pub(crate) fn with_approx_byte_size(mut self, approx_byte_size: usize) -> Self {
72        self.approx_byte_size = Some(approx_byte_size);
73        self
74    }
75
76    /// Read the [`Footer`] from a flatbuffer.
77    pub(crate) fn from_flatbuffer(
78        footer_bytes: FlatBuffer,
79        layout_bytes: FlatBuffer,
80        dtype: DType,
81        statistics: Option<FileStatistics>,
82        session: &VortexSession,
83    ) -> VortexResult<Self> {
84        let approx_byte_size = footer_bytes.len() + layout_bytes.len();
85        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
86
87        // Create a LayoutContext from the registry.
88        let layout_specs = fb_footer.layout_specs();
89        let layout_ids = layout_specs
90            .iter()
91            .flat_map(|e| e.iter())
92            .map(|encoding| LayoutEncodingId::new_arc(Arc::from(encoding.id())))
93            .collect();
94        let layout_ctx = LayoutContext::new(layout_ids);
95
96        // Create an ArrayContext from the registry.
97        let array_specs = fb_footer.array_specs();
98        let array_ids = array_specs
99            .iter()
100            .flat_map(|e| e.iter())
101            .map(|encoding| ArrayId::new_arc(Arc::from(encoding.id())))
102            .collect();
103        let array_ctx = ArrayContext::new(array_ids);
104
105        let root_layout = layout_from_flatbuffer(
106            layout_bytes,
107            &dtype,
108            &layout_ctx,
109            &array_ctx,
110            session.layouts().registry(),
111        )?;
112
113        let segments: Arc<[SegmentSpec]> = fb_footer
114            .segment_specs()
115            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
116            .iter()
117            .map(SegmentSpec::try_from)
118            .try_collect()?;
119
120        // Note this assertion is `<=` since we allow zero-length segments
121        if !segments.is_sorted_by_key(|segment| segment.offset) {
122            vortex_bail!("Segment offsets are not ordered");
123        }
124
125        Ok(Self {
126            root_layout,
127            segments,
128            statistics,
129            array_ctx,
130            approx_byte_size: Some(approx_byte_size),
131        })
132    }
133
134    /// Returns the root [`LayoutRef`] of the file.
135    pub fn layout(&self) -> &LayoutRef {
136        &self.root_layout
137    }
138
139    /// Returns the segment map of the file.
140    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
141        &self.segments
142    }
143
144    /// Returns the statistics of the file.
145    pub fn statistics(&self) -> Option<&FileStatistics> {
146        self.statistics.as_ref()
147    }
148
149    /// Returns the [`DType`] of the file.
150    pub fn dtype(&self) -> &DType {
151        self.root_layout.dtype()
152    }
153
154    /// Returns the approximate size of the footer in bytes, used for caching and memory management.
155    pub fn approx_byte_size(&self) -> Option<usize> {
156        self.approx_byte_size
157    }
158
159    /// Returns the number of rows in the file.
160    pub fn row_count(&self) -> u64 {
161        self.root_layout.row_count()
162    }
163
164    /// Returns a serializer for this footer.
165    pub fn into_serializer(self) -> FooterSerializer {
166        FooterSerializer::new(self)
167    }
168
169    /// Create a deserializer for a Vortex file footer.
170    pub fn deserializer(eof_buffer: ByteBuffer, session: VortexSession) -> FooterDeserializer {
171        FooterDeserializer::new(eof_buffer, session)
172    }
173}