Skip to main content

vortex_file/footer/
serializer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::VortexExpect;
6use vortex_error::VortexResult;
7use vortex_error::vortex_err;
8use vortex_flatbuffers::FlatBuffer;
9use vortex_flatbuffers::FlatBufferRoot;
10use vortex_flatbuffers::WriteFlatBuffer;
11use vortex_flatbuffers::WriteFlatBufferExt;
12use vortex_layout::LayoutContext;
13use vortex_session::registry::ReadContext;
14
15use crate::EOF_SIZE;
16use crate::Footer;
17use crate::MAGIC_BYTES;
18use crate::MAX_POSTSCRIPT_SIZE;
19use crate::VERSION;
20use crate::footer::file_layout::FooterFlatBufferWriter;
21use crate::footer::postscript::Postscript;
22use crate::footer::postscript::PostscriptSegment;
23
24pub struct FooterSerializer {
25    footer: Footer,
26    exclude_dtype: bool,
27    offset: u64,
28}
29
30impl FooterSerializer {
31    pub(super) fn new(footer: Footer) -> Self {
32        Self {
33            footer,
34            exclude_dtype: false,
35            offset: 0,
36        }
37    }
38
39    /// Update the offset used to generate absolute segment locations.
40    ///
41    /// This represents the byte position that the first buffer emitted by this serializer will be
42    /// written to.
43    pub fn with_offset(mut self, offset: u64) -> Self {
44        self.offset = offset;
45        self
46    }
47
48    /// Exclude the DType from the serialized footer.
49    /// If excluded, the reader must be provided the DType from an external source.
50    pub fn exclude_dtype(mut self) -> Self {
51        self.exclude_dtype = true;
52        self
53    }
54
55    /// Whether to exclude the DType from the serialized footer.
56    /// If excluded, the reader must be provided the DType from an external source.
57    pub fn with_exclude_dtype(mut self, exclude_dtype: bool) -> Self {
58        self.exclude_dtype = exclude_dtype;
59        self
60    }
61
62    /// Serialize the footer into a byte buffer that can later be deserialized as a [`Footer`].
63    /// This can be helpful for storing some footer data out-of-band to accelerate opening a file.
64    pub fn serialize(mut self) -> VortexResult<Vec<ByteBuffer>> {
65        let mut buffers = vec![];
66
67        let dtype_segment = if self.exclude_dtype {
68            None
69        } else {
70            let (buffer, dtype_segment) = write_flatbuffer(&mut self.offset, self.footer.dtype())?;
71            buffers.push(buffer);
72            Some(dtype_segment)
73        };
74
75        // TODO(ngates): we should separate the read/write side of Context since the write side
76        //  doesn't need to look anything up in the registry.
77        let layout_ctx = LayoutContext::default();
78
79        let (buffer, layout_segment) = write_flatbuffer(
80            &mut self.offset,
81            &self.footer.layout().flatbuffer_writer(&layout_ctx),
82        )?;
83        buffers.push(buffer);
84
85        let statistics_segment = match self.footer.statistics() {
86            None => None,
87            Some(stats) if stats.stats_sets().is_empty() => None,
88            Some(stats) => {
89                let (buffer, stats_segment) = write_flatbuffer(&mut self.offset, stats)?;
90                buffers.push(buffer);
91                Some(stats_segment)
92            }
93        };
94
95        let (buffer, footer_segment) = write_flatbuffer(
96            &mut self.offset,
97            &FooterFlatBufferWriter {
98                ctx: self.footer.array_read_ctx.clone(),
99                layout_ctx: ReadContext::new(layout_ctx.to_ids()),
100                segment_specs: self.footer.segments.clone(),
101            },
102        )?;
103        buffers.push(buffer);
104
105        // Assemble the postscript, and write it manually to avoid any framing.
106        let postscript = Postscript {
107            dtype: dtype_segment,
108            layout: layout_segment,
109            statistics: statistics_segment,
110            footer: footer_segment,
111        };
112        let postscript_buffer = postscript.write_flatbuffer_bytes()?;
113        if postscript_buffer.len() > MAX_POSTSCRIPT_SIZE as usize {
114            Err(vortex_err!(
115                "Postscript is too large ({} bytes); max postscript size is {}",
116                postscript_buffer.len(),
117                MAX_POSTSCRIPT_SIZE
118            ))?;
119        }
120
121        let postscript_len = u16::try_from(postscript_buffer.len())
122            .vortex_expect("Postscript already verified to fit into u16");
123        buffers.push(postscript_buffer.into_inner());
124
125        // And finally, the EOF 8-byte footer.
126        let mut eof = [0u8; EOF_SIZE];
127        eof[0..2].copy_from_slice(&VERSION.to_le_bytes());
128        eof[2..4].copy_from_slice(&postscript_len.to_le_bytes());
129        eof[4..8].copy_from_slice(&MAGIC_BYTES);
130        buffers.push(ByteBuffer::copy_from(eof));
131
132        Ok(buffers)
133    }
134}
135
136fn write_flatbuffer<F: FlatBufferRoot + WriteFlatBuffer>(
137    offset: &mut u64,
138    flatbuffer: &F,
139) -> VortexResult<(ByteBuffer, PostscriptSegment)> {
140    let buffer = flatbuffer.write_flatbuffer_bytes()?;
141    let length = u32::try_from(buffer.len())
142        .map_err(|_| vortex_err!("flatbuffer length exceeds maximum u32"))?;
143
144    let segment = PostscriptSegment {
145        offset: *offset,
146        length,
147        alignment: FlatBuffer::alignment(),
148    };
149
150    *offset += u64::from(length);
151
152    Ok((buffer.into_inner(), segment))
153}