Skip to main content

vortex_file/footer/
serializer.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_buffer::ByteBuffer;
5use vortex_error::VortexExpect;
6use vortex_error::VortexResult;
7use vortex_error::vortex_err;
8use vortex_flatbuffers::FlatBuffer;
9use vortex_flatbuffers::FlatBufferRoot;
10use vortex_flatbuffers::WriteFlatBuffer;
11use vortex_flatbuffers::WriteFlatBufferExt;
12use vortex_layout::LayoutContext;
13
14use crate::EOF_SIZE;
15use crate::Footer;
16use crate::MAGIC_BYTES;
17use crate::MAX_POSTSCRIPT_SIZE;
18use crate::VERSION;
19use crate::footer::file_layout::FooterFlatBufferWriter;
20use crate::footer::postscript::Postscript;
21use crate::footer::postscript::PostscriptSegment;
22
23pub struct FooterSerializer {
24    footer: Footer,
25    exclude_dtype: bool,
26    offset: u64,
27}
28
29impl FooterSerializer {
30    pub(super) fn new(footer: Footer) -> Self {
31        Self {
32            footer,
33            exclude_dtype: false,
34            offset: 0,
35        }
36    }
37
38    /// Update the offset used to generate absolute segment locations.
39    ///
40    /// This represents the byte position that the first buffer emitted by this serializer will be
41    /// written to.
42    pub fn with_offset(mut self, offset: u64) -> Self {
43        self.offset = offset;
44        self
45    }
46
47    /// Exclude the DType from the serialized footer.
48    /// If excluded, the reader must be provided the DType from an external source.
49    pub fn exclude_dtype(mut self) -> Self {
50        self.exclude_dtype = true;
51        self
52    }
53
54    /// Whether to exclude the DType from the serialized footer.
55    /// If excluded, the reader must be provided the DType from an external source.
56    pub fn with_exclude_dtype(mut self, exclude_dtype: bool) -> Self {
57        self.exclude_dtype = exclude_dtype;
58        self
59    }
60
61    /// Serialize the footer into a byte buffer that can later be deserialized as a [`Footer`].
62    /// This can be helpful for storing some footer data out-of-band to accelerate opening a file.
63    pub fn serialize(mut self) -> VortexResult<Vec<ByteBuffer>> {
64        let mut buffers = vec![];
65
66        let dtype_segment = if self.exclude_dtype {
67            None
68        } else {
69            let (buffer, dtype_segment) = write_flatbuffer(&mut self.offset, self.footer.dtype())?;
70            buffers.push(buffer);
71            Some(dtype_segment)
72        };
73
74        // TODO(ngates): we should separate the read/write side of Context since the write side
75        //  doesn't need to look anything up in the registry.
76        let layout_ctx = LayoutContext::default();
77
78        let (buffer, layout_segment) = write_flatbuffer(
79            &mut self.offset,
80            &self.footer.layout().flatbuffer_writer(&layout_ctx),
81        )?;
82        buffers.push(buffer);
83
84        let statistics_segment = match self.footer.statistics() {
85            None => None,
86            Some(stats) if stats.stats_sets().is_empty() => None,
87            Some(stats) => {
88                let (buffer, stats_segment) = write_flatbuffer(&mut self.offset, stats)?;
89                buffers.push(buffer);
90                Some(stats_segment)
91            }
92        };
93
94        let (buffer, footer_segment) = write_flatbuffer(
95            &mut self.offset,
96            &FooterFlatBufferWriter {
97                ctx: self.footer.array_ctx.clone(),
98                layout_ctx,
99                segment_specs: self.footer.segments.clone(),
100            },
101        )?;
102        buffers.push(buffer);
103
104        // Assemble the postscript, and write it manually to avoid any framing.
105        let postscript = Postscript {
106            dtype: dtype_segment,
107            layout: layout_segment,
108            statistics: statistics_segment,
109            footer: footer_segment,
110        };
111        let postscript_buffer = postscript.write_flatbuffer_bytes()?;
112        if postscript_buffer.len() > MAX_POSTSCRIPT_SIZE as usize {
113            Err(vortex_err!(
114                "Postscript is too large ({} bytes); max postscript size is {}",
115                postscript_buffer.len(),
116                MAX_POSTSCRIPT_SIZE
117            ))?;
118        }
119
120        let postscript_len = u16::try_from(postscript_buffer.len())
121            .vortex_expect("Postscript already verified to fit into u16");
122        buffers.push(postscript_buffer.into_inner());
123
124        // And finally, the EOF 8-byte footer.
125        let mut eof = [0u8; EOF_SIZE];
126        eof[0..2].copy_from_slice(&VERSION.to_le_bytes());
127        eof[2..4].copy_from_slice(&postscript_len.to_le_bytes());
128        eof[4..8].copy_from_slice(&MAGIC_BYTES);
129        buffers.push(ByteBuffer::copy_from(eof));
130
131        Ok(buffers)
132    }
133}
134
135fn write_flatbuffer<F: FlatBufferRoot + WriteFlatBuffer>(
136    offset: &mut u64,
137    flatbuffer: &F,
138) -> VortexResult<(ByteBuffer, PostscriptSegment)> {
139    let buffer = flatbuffer.write_flatbuffer_bytes()?;
140    let length = u32::try_from(buffer.len())
141        .map_err(|_| vortex_err!("flatbuffer length exceeds maximum u32"))?;
142
143    let segment = PostscriptSegment {
144        offset: *offset,
145        length,
146        alignment: FlatBuffer::alignment(),
147    };
148
149    *offset += u64::from(length);
150
151    Ok((buffer.into_inner(), segment))
152}