vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub(crate) use file_statistics::*;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::stats::StatsSet;
28use vortex_array::{ArrayContext, ArraySessionExt};
29use vortex_buffer::ByteBuffer;
30use vortex_dtype::DType;
31use vortex_error::{VortexResult, vortex_bail, vortex_err};
32use vortex_flatbuffers::{FlatBuffer, footer as fb};
33use vortex_layout::session::LayoutSessionExt;
34use vortex_layout::{LayoutContext, LayoutRef, layout_from_flatbuffer};
35use vortex_session::VortexSession;
36
37/// Captures the layout information of a Vortex file.
38#[derive(Debug, Clone)]
39pub struct Footer {
40    root_layout: LayoutRef,
41    segments: Arc<[SegmentSpec]>,
42    statistics: Option<FileStatistics>,
43    // The specific arrays used within the file, in the order they were registered.
44    array_ctx: ArrayContext,
45}
46
47impl Footer {
48    pub(crate) fn new(
49        root_layout: LayoutRef,
50        segments: Arc<[SegmentSpec]>,
51        statistics: Option<FileStatistics>,
52        array_ctx: ArrayContext,
53    ) -> Self {
54        Self {
55            root_layout,
56            segments,
57            statistics,
58            array_ctx,
59        }
60    }
61
62    /// Read the [`Footer`] from a flatbuffer.
63    pub(crate) fn from_flatbuffer(
64        footer_bytes: FlatBuffer,
65        layout_bytes: FlatBuffer,
66        dtype: DType,
67        statistics: Option<FileStatistics>,
68        session: VortexSession,
69    ) -> VortexResult<Self> {
70        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
71
72        // Create a LayoutContext from the registry.
73        let layout_specs = fb_footer.layout_specs();
74        let layout_ids = layout_specs
75            .iter()
76            .flat_map(|e| e.iter())
77            .map(|encoding| encoding.id());
78        let layout_ctx =
79            LayoutContext::try_from_registry(session.layouts().registry(), layout_ids)?;
80
81        // Create an ArrayContext from the registry.
82        let array_specs = fb_footer.array_specs();
83        let array_ids = array_specs
84            .iter()
85            .flat_map(|e| e.iter())
86            .map(|encoding| encoding.id());
87        let array_ctx = ArrayContext::try_from_registry(session.arrays().registry(), array_ids)?;
88
89        let root_layout = layout_from_flatbuffer(layout_bytes, &dtype, &layout_ctx, &array_ctx)?;
90
91        let segments: Arc<[SegmentSpec]> = fb_footer
92            .segment_specs()
93            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
94            .iter()
95            .map(SegmentSpec::try_from)
96            .try_collect()?;
97
98        // Note this assertion is `<=` since we allow zero-length segments
99        if !segments.is_sorted_by_key(|segment| segment.offset) {
100            vortex_bail!("Segment offsets are not ordered");
101        }
102
103        Ok(Self {
104            root_layout,
105            segments,
106            statistics,
107            array_ctx,
108        })
109    }
110
111    /// Returns the root [`LayoutRef`] of the file.
112    pub fn layout(&self) -> &LayoutRef {
113        &self.root_layout
114    }
115
116    /// Returns the segment map of the file.
117    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
118        &self.segments
119    }
120
121    /// Returns the statistics of the file.
122    pub fn statistics(&self) -> Option<&Arc<[StatsSet]>> {
123        self.statistics.as_ref().map(|s| &s.0)
124    }
125
126    /// Returns the [`DType`] of the file.
127    pub fn dtype(&self) -> &DType {
128        self.root_layout.dtype()
129    }
130
131    /// Returns the number of rows in the file.
132    pub fn row_count(&self) -> u64 {
133        self.root_layout.row_count()
134    }
135
136    /// Returns a serializer for this footer.
137    pub fn into_serializer(self) -> FooterSerializer {
138        FooterSerializer::new(self)
139    }
140
141    /// Create a deserializer for a Vortex file footer.
142    pub fn deserializer(eof_buffer: ByteBuffer, session: VortexSession) -> FooterDeserializer {
143        FooterDeserializer::new(eof_buffer, session)
144    }
145}