vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub(crate) use file_statistics::*;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::ArrayContext;
28use vortex_array::session::ArraySessionExt;
29use vortex_array::stats::StatsSet;
30use vortex_buffer::ByteBuffer;
31use vortex_dtype::DType;
32use vortex_error::VortexResult;
33use vortex_error::vortex_bail;
34use vortex_error::vortex_err;
35use vortex_flatbuffers::FlatBuffer;
36use vortex_flatbuffers::footer as fb;
37use vortex_layout::LayoutContext;
38use vortex_layout::LayoutRef;
39use vortex_layout::layout_from_flatbuffer;
40use vortex_layout::session::LayoutSessionExt;
41use vortex_session::VortexSession;
42
43/// Captures the layout information of a Vortex file.
44#[derive(Debug, Clone)]
45pub struct Footer {
46    root_layout: LayoutRef,
47    segments: Arc<[SegmentSpec]>,
48    statistics: Option<FileStatistics>,
49    // The specific arrays used within the file, in the order they were registered.
50    array_ctx: ArrayContext,
51}
52
53impl Footer {
54    pub(crate) fn new(
55        root_layout: LayoutRef,
56        segments: Arc<[SegmentSpec]>,
57        statistics: Option<FileStatistics>,
58        array_ctx: ArrayContext,
59    ) -> Self {
60        Self {
61            root_layout,
62            segments,
63            statistics,
64            array_ctx,
65        }
66    }
67
68    /// Read the [`Footer`] from a flatbuffer.
69    pub(crate) fn from_flatbuffer(
70        footer_bytes: FlatBuffer,
71        layout_bytes: FlatBuffer,
72        dtype: DType,
73        statistics: Option<FileStatistics>,
74        session: &VortexSession,
75    ) -> VortexResult<Self> {
76        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
77
78        // Create a LayoutContext from the registry.
79        let layout_specs = fb_footer.layout_specs();
80        let layout_ids = layout_specs
81            .iter()
82            .flat_map(|e| e.iter())
83            .map(|encoding| encoding.id());
84        let layout_ctx =
85            LayoutContext::try_from_registry(session.layouts().registry(), layout_ids)?;
86
87        // Create an ArrayContext from the registry.
88        let array_specs = fb_footer.array_specs();
89        let array_ids = array_specs
90            .iter()
91            .flat_map(|e| e.iter())
92            .map(|encoding| encoding.id());
93        let array_ctx = ArrayContext::try_from_registry(session.arrays().registry(), array_ids)?;
94
95        let root_layout = layout_from_flatbuffer(layout_bytes, &dtype, &layout_ctx, &array_ctx)?;
96
97        let segments: Arc<[SegmentSpec]> = fb_footer
98            .segment_specs()
99            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
100            .iter()
101            .map(SegmentSpec::try_from)
102            .try_collect()?;
103
104        // Note this assertion is `<=` since we allow zero-length segments
105        if !segments.is_sorted_by_key(|segment| segment.offset) {
106            vortex_bail!("Segment offsets are not ordered");
107        }
108
109        Ok(Self {
110            root_layout,
111            segments,
112            statistics,
113            array_ctx,
114        })
115    }
116
117    /// Returns the root [`LayoutRef`] of the file.
118    pub fn layout(&self) -> &LayoutRef {
119        &self.root_layout
120    }
121
122    /// Returns the segment map of the file.
123    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
124        &self.segments
125    }
126
127    /// Returns the statistics of the file.
128    pub fn statistics(&self) -> Option<&Arc<[StatsSet]>> {
129        self.statistics.as_ref().map(|s| &s.0)
130    }
131
132    /// Returns the [`DType`] of the file.
133    pub fn dtype(&self) -> &DType {
134        self.root_layout.dtype()
135    }
136
137    /// Returns the number of rows in the file.
138    pub fn row_count(&self) -> u64 {
139        self.root_layout.row_count()
140    }
141
142    /// Returns a serializer for this footer.
143    pub fn into_serializer(self) -> FooterSerializer {
144        FooterSerializer::new(self)
145    }
146
147    /// Create a deserializer for a Vortex file footer.
148    pub fn deserializer(eof_buffer: ByteBuffer, session: VortexSession) -> FooterDeserializer {
149        FooterDeserializer::new(eof_buffer, session)
150    }
151}