vortex_file/footer/
serializer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::{VortexExpect, VortexResult, vortex_err};
6use vortex_flatbuffers::{FlatBuffer, FlatBufferRoot, WriteFlatBuffer, WriteFlatBufferExt};
7use vortex_layout::LayoutContext;
8
9use crate::footer::FileStatistics;
10use crate::footer::file_layout::FooterFlatBufferWriter;
11use crate::footer::postscript::{Postscript, PostscriptSegment};
12use crate::{EOF_SIZE, Footer, MAGIC_BYTES, MAX_POSTSCRIPT_SIZE, VERSION};
13
14pub struct FooterSerializer {
15    footer: Footer,
16    exclude_dtype: bool,
17    offset: u64,
18}
19
20impl FooterSerializer {
21    pub(super) fn new(footer: Footer) -> Self {
22        Self {
23            footer,
24            exclude_dtype: false,
25            offset: 0,
26        }
27    }
28
29    /// Update the offset used to generate absolute segment locations.
30    ///
31    /// This represents the byte position that the first buffer emitted by this serializer will be
32    /// written to.
33    pub fn with_offset(mut self, offset: u64) -> Self {
34        self.offset = offset;
35        self
36    }
37
38    /// Exclude the DType from the serialized footer.
39    /// If excluded, the reader must be provided the DType from an external source.
40    pub fn exclude_dtype(mut self) -> Self {
41        self.exclude_dtype = true;
42        self
43    }
44
45    /// Whether to exclude the DType from the serialized footer.
46    /// If excluded, the reader must be provided the DType from an external source.
47    pub fn with_exclude_dtype(mut self, exclude_dtype: bool) -> Self {
48        self.exclude_dtype = exclude_dtype;
49        self
50    }
51
52    /// Serialize the footer into a byte buffer that can later be deserialized as a [`Footer`].
53    /// This can be helpful for storing some footer data out-of-band to accelerate opening a file.
54    pub fn serialize(mut self) -> VortexResult<Vec<ByteBuffer>> {
55        let mut buffers = vec![];
56
57        let dtype_segment = if self.exclude_dtype {
58            None
59        } else {
60            let (buffer, dtype_segment) = write_flatbuffer(&mut self.offset, self.footer.dtype())?;
61            buffers.push(buffer);
62            Some(dtype_segment)
63        };
64
65        let layout_ctx = LayoutContext::empty();
66        let (buffer, layout_segment) = write_flatbuffer(
67            &mut self.offset,
68            &self.footer.layout().flatbuffer_writer(&layout_ctx),
69        )?;
70        buffers.push(buffer);
71
72        let statistics_segment = match self.footer.statistics() {
73            None => None,
74            Some(stats) if stats.is_empty() => None,
75            Some(stats) => {
76                let stats = FileStatistics(stats.clone());
77                let (buffer, stats_segment) = write_flatbuffer(&mut self.offset, &stats)?;
78                buffers.push(buffer);
79                Some(stats_segment)
80            }
81        };
82
83        let (buffer, footer_segment) = write_flatbuffer(
84            &mut self.offset,
85            &FooterFlatBufferWriter {
86                ctx: self.footer.array_ctx.clone(),
87                layout_ctx,
88                segment_specs: self.footer.segments.clone(),
89            },
90        )?;
91        buffers.push(buffer);
92
93        // Assemble the postscript, and write it manually to avoid any framing.
94        let postscript = Postscript {
95            dtype: dtype_segment,
96            layout: layout_segment,
97            statistics: statistics_segment,
98            footer: footer_segment,
99        };
100        let postscript_buffer = postscript.write_flatbuffer_bytes();
101        if postscript_buffer.len() > MAX_POSTSCRIPT_SIZE as usize {
102            Err(vortex_err!(
103                "Postscript is too large ({} bytes); max postscript size is {}",
104                postscript_buffer.len(),
105                MAX_POSTSCRIPT_SIZE
106            ))?;
107        }
108
109        let postscript_len = u16::try_from(postscript_buffer.len())
110            .vortex_expect("Postscript already verified to fit into u16");
111        buffers.push(postscript_buffer.into_inner());
112
113        // And finally, the EOF 8-byte footer.
114        let mut eof = [0u8; EOF_SIZE];
115        eof[0..2].copy_from_slice(&VERSION.to_le_bytes());
116        eof[2..4].copy_from_slice(&postscript_len.to_le_bytes());
117        eof[4..8].copy_from_slice(&MAGIC_BYTES);
118        buffers.push(ByteBuffer::copy_from(eof));
119
120        Ok(buffers)
121    }
122}
123
124fn write_flatbuffer<F: FlatBufferRoot + WriteFlatBuffer>(
125    offset: &mut u64,
126    flatbuffer: &F,
127) -> VortexResult<(ByteBuffer, PostscriptSegment)> {
128    let buffer = flatbuffer.write_flatbuffer_bytes();
129    let length = u32::try_from(buffer.len())
130        .map_err(|_| vortex_err!("flatbuffer length exceeds maximum u32"))?;
131
132    let segment = PostscriptSegment {
133        offset: *offset,
134        length,
135        alignment: FlatBuffer::alignment(),
136    };
137
138    *offset += u64::from(length);
139
140    Ok((buffer.into_inner(), segment))
141}