vortex_file/footer/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! This module defines the footer of a Vortex file, which contains metadata about the file's contents.
5//!
6//! The footer includes:
7//! - The file's layout, which describes how the data is organized
8//! - Statistics about the data, which can be used for query optimization
9//! - Segment map, which describe the physical location of data in the file
10//!
11//! The footer is located at the end of the file and is used to interpret the file's contents.
12mod file_layout;
13mod file_statistics;
14mod postscript;
15mod segment;
16
17use std::sync::Arc;
18
19mod serializer;
20pub use serializer::*;
21mod deserializer;
22pub use deserializer::*;
23pub(crate) use file_statistics::*;
24use flatbuffers::root;
25use itertools::Itertools;
26pub use segment::*;
27use vortex_array::stats::StatsSet;
28use vortex_array::{ArrayContext, ArrayRegistry};
29use vortex_buffer::ByteBuffer;
30use vortex_dtype::DType;
31use vortex_error::{VortexResult, vortex_bail, vortex_err};
32use vortex_flatbuffers::{FlatBuffer, footer as fb};
33use vortex_layout::{LayoutRef, LayoutRegistry, layout_from_flatbuffer};
34
35/// Captures the layout information of a Vortex file.
36#[derive(Debug, Clone)]
37pub struct Footer {
38    root_layout: LayoutRef,
39    segments: Arc<[SegmentSpec]>,
40    statistics: Option<FileStatistics>,
41    array_ctx: ArrayContext,
42}
43
44impl Footer {
45    pub(crate) fn new(
46        root_layout: LayoutRef,
47        segments: Arc<[SegmentSpec]>,
48        statistics: Option<FileStatistics>,
49        array_ctx: ArrayContext,
50    ) -> Self {
51        Self {
52            root_layout,
53            segments,
54            statistics,
55            array_ctx,
56        }
57    }
58
59    /// Read the [`Footer`] from a flatbuffer.
60    pub(crate) fn from_flatbuffer(
61        footer_bytes: FlatBuffer,
62        layout_bytes: FlatBuffer,
63        dtype: DType,
64        statistics: Option<FileStatistics>,
65        array_registry: &ArrayRegistry,
66        layout_registry: &LayoutRegistry,
67    ) -> VortexResult<Self> {
68        let fb_footer = root::<fb::Footer>(&footer_bytes)?;
69
70        // Create a LayoutContext from the registry.
71        let layout_specs = fb_footer.layout_specs();
72        let layout_ids = layout_specs
73            .iter()
74            .flat_map(|e| e.iter())
75            .map(|encoding| encoding.id());
76        let layout_ctx = layout_registry.new_context(layout_ids)?;
77
78        // Create an ArrayContext from the registry.
79        let array_specs = fb_footer.array_specs();
80        let array_ids = array_specs
81            .iter()
82            .flat_map(|e| e.iter())
83            .map(|encoding| encoding.id());
84        let array_ctx = array_registry.new_context(array_ids)?;
85
86        let root_layout = layout_from_flatbuffer(layout_bytes, &dtype, &layout_ctx, &array_ctx)?;
87
88        let segments: Arc<[SegmentSpec]> = fb_footer
89            .segment_specs()
90            .ok_or_else(|| vortex_err!("FileLayout missing segment specs"))?
91            .iter()
92            .map(SegmentSpec::try_from)
93            .try_collect()?;
94
95        // Note this assertion is `<=` since we allow zero-length segments
96        if !segments.is_sorted_by_key(|segment| segment.offset) {
97            vortex_bail!("Segment offsets are not ordered");
98        }
99
100        Ok(Self {
101            root_layout,
102            segments,
103            statistics,
104            array_ctx,
105        })
106    }
107
108    /// Returns the root [`LayoutRef`] of the file.
109    pub fn layout(&self) -> &LayoutRef {
110        &self.root_layout
111    }
112
113    /// Returns the segment map of the file.
114    pub fn segment_map(&self) -> &Arc<[SegmentSpec]> {
115        &self.segments
116    }
117
118    /// Returns the statistics of the file.
119    pub fn statistics(&self) -> Option<&Arc<[StatsSet]>> {
120        self.statistics.as_ref().map(|s| &s.0)
121    }
122
123    /// Returns the [`DType`] of the file.
124    pub fn dtype(&self) -> &DType {
125        self.root_layout.dtype()
126    }
127
128    /// Returns the number of rows in the file.
129    pub fn row_count(&self) -> u64 {
130        self.root_layout.row_count()
131    }
132
133    /// Returns a serializer for this footer.
134    pub fn into_serializer(self) -> FooterSerializer {
135        FooterSerializer::new(self)
136    }
137
138    /// Create a deserializer for a Vortex file footer.
139    pub fn deserializer(eof_buffer: ByteBuffer) -> FooterDeserializer {
140        FooterDeserializer::new(eof_buffer)
141    }
142}