Skip to main content

vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub use file_statistics::FileStatistics;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::ArrayContext;
28use vortex_array::vtable::ArrayId;
29use vortex_buffer::ByteBuffer;
30use vortex_dtype::DType;
31use vortex_error::VortexResult;
32use vortex_error::vortex_bail;
33use vortex_error::vortex_err;
34use vortex_flatbuffers::FlatBuffer;
35use vortex_flatbuffers::footer as fb;
36use vortex_layout::LayoutContext;
37use vortex_layout::LayoutEncodingId;
38use vortex_layout::LayoutRef;
39use vortex_layout::layout_from_flatbuffer;
40use vortex_layout::session::LayoutSessionExt;
41use vortex_session::VortexSession;
42
43/// Captures the layout information of a Vortex file.
44#[derive(Debug, Clone)]
45pub struct Footer {
46    root_layout: LayoutRef,
47    segments: Arc<[SegmentSpec]>,
48    statistics: Option<FileStatistics>,
49    // The specific arrays used within the file, in the order they were registered.
50    array_ctx: ArrayContext,
51}
52
53impl Footer {
54    pub(crate) fn new(
55        root_layout: LayoutRef,
56        segments: Arc<[SegmentSpec]>,
57        statistics: Option<FileStatistics>,
58        array_ctx: ArrayContext,
59    ) -> Self {
60        Self {
61            root_layout,
62            segments,
63            statistics,
64            array_ctx,
65        }
66    }
67
68    /// Read the [`Footer`] from a flatbuffer.
69    pub(crate) fn from_flatbuffer(
70        footer_bytes: FlatBuffer,
71        layout_bytes: FlatBuffer,
72        dtype: DType,
73        statistics: Option<FileStatistics>,
74        session: &VortexSession,
75    ) -> VortexResult<Self> {
76        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
77
78        // Create a LayoutContext from the registry.
79        let layout_specs = fb_footer.layout_specs();
80        let layout_ids = layout_specs
81            .iter()
82            .flat_map(|e| e.iter())
83            .map(|encoding| LayoutEncodingId::new_arc(Arc::from(encoding.id())))
84            .collect();
85        let layout_ctx = LayoutContext::new(layout_ids);
86
87        // Create an ArrayContext from the registry.
88        let array_specs = fb_footer.array_specs();
89        let array_ids = array_specs
90            .iter()
91            .flat_map(|e| e.iter())
92            .map(|encoding| ArrayId::new_arc(Arc::from(encoding.id())))
93            .collect();
94        let array_ctx = ArrayContext::new(array_ids);
95
96        let root_layout = layout_from_flatbuffer(
97            layout_bytes,
98            &dtype,
99            &layout_ctx,
100            &array_ctx,
101            session.layouts().registry(),
102        )?;
103
104        let segments: Arc<[SegmentSpec]> = fb_footer
105            .segment_specs()
106            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
107            .iter()
108            .map(SegmentSpec::try_from)
109            .try_collect()?;
110
111        // Note this assertion is `<=` since we allow zero-length segments
112        if !segments.is_sorted_by_key(|segment| segment.offset) {
113            vortex_bail!("Segment offsets are not ordered");
114        }
115
116        Ok(Self {
117            root_layout,
118            segments,
119            statistics,
120            array_ctx,
121        })
122    }
123
124    /// Returns the root [`LayoutRef`] of the file.
125    pub fn layout(&self) -> &LayoutRef {
126        &self.root_layout
127    }
128
129    /// Returns the segment map of the file.
130    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
131        &self.segments
132    }
133
134    /// Returns the statistics of the file.
135    pub fn statistics(&self) -> Option<&FileStatistics> {
136        self.statistics.as_ref()
137    }
138
139    /// Returns the [`DType`] of the file.
140    pub fn dtype(&self) -> &DType {
141        self.root_layout.dtype()
142    }
143
144    /// Returns the number of rows in the file.
145    pub fn row_count(&self) -> u64 {
146        self.root_layout.row_count()
147    }
148
149    /// Returns a serializer for this footer.
150    pub fn into_serializer(self) -> FooterSerializer {
151        FooterSerializer::new(self)
152    }
153
154    /// Create a deserializer for a Vortex file footer.
155    pub fn deserializer(eof_buffer: ByteBuffer, session: VortexSession) -> FooterDeserializer {
156        FooterDeserializer::new(eof_buffer, session)
157    }
158}