vortex_file/footer/
serializer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::VortexExpect;
6use vortex_error::VortexResult;
7use vortex_error::vortex_err;
8use vortex_flatbuffers::FlatBuffer;
9use vortex_flatbuffers::FlatBufferRoot;
10use vortex_flatbuffers::WriteFlatBuffer;
11use vortex_flatbuffers::WriteFlatBufferExt;
12use vortex_layout::LayoutContext;
13
14use crate::EOF_SIZE;
15use crate::Footer;
16use crate::MAGIC_BYTES;
17use crate::MAX_POSTSCRIPT_SIZE;
18use crate::VERSION;
19use crate::footer::FileStatistics;
20use crate::footer::file_layout::FooterFlatBufferWriter;
21use crate::footer::postscript::Postscript;
22use crate::footer::postscript::PostscriptSegment;
23
24pub struct FooterSerializer {
25    footer: Footer,
26    exclude_dtype: bool,
27    offset: u64,
28}
29
30impl FooterSerializer {
31    pub(super) fn new(footer: Footer) -> Self {
32        Self {
33            footer,
34            exclude_dtype: false,
35            offset: 0,
36        }
37    }
38
39    /// Update the offset used to generate absolute segment locations.
40    ///
41    /// This represents the byte position that the first buffer emitted by this serializer will be
42    /// written to.
43    pub fn with_offset(mut self, offset: u64) -> Self {
44        self.offset = offset;
45        self
46    }
47
48    /// Exclude the DType from the serialized footer.
49    /// If excluded, the reader must be provided the DType from an external source.
50    pub fn exclude_dtype(mut self) -> Self {
51        self.exclude_dtype = true;
52        self
53    }
54
55    /// Whether to exclude the DType from the serialized footer.
56    /// If excluded, the reader must be provided the DType from an external source.
57    pub fn with_exclude_dtype(mut self, exclude_dtype: bool) -> Self {
58        self.exclude_dtype = exclude_dtype;
59        self
60    }
61
62    /// Serialize the footer into a byte buffer that can later be deserialized as a [`Footer`].
63    /// This can be helpful for storing some footer data out-of-band to accelerate opening a file.
64    pub fn serialize(mut self) -> VortexResult<Vec<ByteBuffer>> {
65        let mut buffers = vec![];
66
67        let dtype_segment = if self.exclude_dtype {
68            None
69        } else {
70            let (buffer, dtype_segment) = write_flatbuffer(&mut self.offset, self.footer.dtype())?;
71            buffers.push(buffer);
72            Some(dtype_segment)
73        };
74
75        let layout_ctx = LayoutContext::empty();
76        let (buffer, layout_segment) = write_flatbuffer(
77            &mut self.offset,
78            &self.footer.layout().flatbuffer_writer(&layout_ctx),
79        )?;
80        buffers.push(buffer);
81
82        let statistics_segment = match self.footer.statistics() {
83            None => None,
84            Some(stats) if stats.is_empty() => None,
85            Some(stats) => {
86                let stats = FileStatistics(stats.clone());
87                let (buffer, stats_segment) = write_flatbuffer(&mut self.offset, &stats)?;
88                buffers.push(buffer);
89                Some(stats_segment)
90            }
91        };
92
93        let (buffer, footer_segment) = write_flatbuffer(
94            &mut self.offset,
95            &FooterFlatBufferWriter {
96                ctx: self.footer.array_ctx.clone(),
97                layout_ctx,
98                segment_specs: self.footer.segments.clone(),
99            },
100        )?;
101        buffers.push(buffer);
102
103        // Assemble the postscript, and write it manually to avoid any framing.
104        let postscript = Postscript {
105            dtype: dtype_segment,
106            layout: layout_segment,
107            statistics: statistics_segment,
108            footer: footer_segment,
109        };
110        let postscript_buffer = postscript.write_flatbuffer_bytes();
111        if postscript_buffer.len() > MAX_POSTSCRIPT_SIZE as usize {
112            Err(vortex_err!(
113                "Postscript is too large ({} bytes); max postscript size is {}",
114                postscript_buffer.len(),
115                MAX_POSTSCRIPT_SIZE
116            ))?;
117        }
118
119        let postscript_len = u16::try_from(postscript_buffer.len())
120            .vortex_expect("Postscript already verified to fit into u16");
121        buffers.push(postscript_buffer.into_inner());
122
123        // And finally, the EOF 8-byte footer.
124        let mut eof = [0u8; EOF_SIZE];
125        eof[0..2].copy_from_slice(&VERSION.to_le_bytes());
126        eof[2..4].copy_from_slice(&postscript_len.to_le_bytes());
127        eof[4..8].copy_from_slice(&MAGIC_BYTES);
128        buffers.push(ByteBuffer::copy_from(eof));
129
130        Ok(buffers)
131    }
132}
133
134fn write_flatbuffer<F: FlatBufferRoot + WriteFlatBuffer>(
135    offset: &mut u64,
136    flatbuffer: &F,
137) -> VortexResult<(ByteBuffer, PostscriptSegment)> {
138    let buffer = flatbuffer.write_flatbuffer_bytes();
139    let length = u32::try_from(buffer.len())
140        .map_err(|_| vortex_err!("flatbuffer length exceeds maximum u32"))?;
141
142    let segment = PostscriptSegment {
143        offset: *offset,
144        length,
145        alignment: FlatBuffer::alignment(),
146    };
147
148    *offset += u64::from(length);
149
150    Ok((buffer.into_inner(), segment))
151}