Skip to main content

array_format/
footer.rs

1//! File footer: the index that maps array names to block addresses.
2//!
3//! The footer is appended at the end of the file. A 12-byte trailer
4//! (`footer_size: u64 LE` + `MAGIC`) allows the reader to locate and
5//! validate the footer from the tail of the file.
6//!
7//! ```text
8//! [footer_bytes][footer_size: u64 LE][MAGIC b"ARRF"]
9//! ```
10
11use rkyv::{Archive, Deserialize, Serialize};
12
13use crate::block::BlockMeta;
14use crate::error::{Error, Result};
15use crate::layout::ArrayMeta;
16use crate::storage::Storage;
17
18/// Magic bytes written at the very end of the file.
19pub const MAGIC: [u8; 4] = *b"ARRF";
20
21/// Current footer format version.
22pub const FOOTER_VERSION: u32 = 4;
23
24/// Size of the trailer in bytes (`u64` footer size + 4-byte magic).
25pub const TRAILER_SIZE: usize = 12;
26
27/// The file footer containing the block table and array table.
28///
29/// Serialized with [`rkyv`] for zero-copy access to the archived form.
30#[derive(Debug, Clone, PartialEq, Archive, Serialize, Deserialize)]
31pub struct Footer {
32    /// Format version.
33    pub version: u32,
34    /// Block table: metadata for every block in the data region.
35    pub blocks: Vec<BlockMeta>,
36    /// Array table: metadata for every array stored in the file.
37    pub arrays: Vec<ArrayMeta>,
38    /// Global dictionary of attribute key strings.
39    ///
40    /// Array attributes reference keys by index into this vec, so each
41    /// unique key string is stored exactly once regardless of how many
42    /// arrays carry that attribute.
43    pub attr_keys: Vec<String>,
44    /// Global dictionary of attribute values.
45    ///
46    /// Array attributes reference values by index into this vec, so each
47    /// distinct value is stored exactly once across all arrays. Together with
48    /// `attr_keys` this means each `ArrayMeta::attributes` entry is just 4
49    /// bytes (two `u16` indices).
50    pub attr_values: Vec<crate::layout::AttributeValue>,
51    /// Position of this file in the overlay stack.
52    ///
53    /// `0` = base file. `N > 0` = the Nth sidecar (`{stem}.N.arrf`).
54    /// A sidecar footer's `arrays` list contains only the delta — arrays
55    /// and chunks that changed relative to lower layers.
56    pub overlay_index: u32,
57    /// Stem of the base `.af` file this sidecar belongs to.
58    ///
59    /// Empty for base files. Used to validate that a sidecar was created
60    /// for the correct base file when opening a layered file.
61    pub base_file_hint: String,
62}
63
64impl Footer {
65    /// Creates a new empty base-file footer.
66    pub fn new() -> Self {
67        Self {
68            version: FOOTER_VERSION,
69            blocks: Vec::new(),
70            arrays: Vec::new(),
71            attr_keys: Vec::new(),
72            attr_values: Vec::new(),
73            overlay_index: 0,
74            base_file_hint: String::new(),
75        }
76    }
77
78    /// Creates a new empty overlay (sidecar) footer.
79    pub fn new_overlay(overlay_index: u32, base_file_hint: impl Into<String>) -> Self {
80        Self {
81            version: FOOTER_VERSION,
82            overlay_index,
83            base_file_hint: base_file_hint.into(),
84            ..Self::new()
85        }
86    }
87
88    /// Returns `true` if this footer belongs to a sidecar file.
89    pub fn is_overlay(&self) -> bool {
90        self.overlay_index > 0
91    }
92
93    /// Serializes the footer to bytes, appending the trailer.
94    ///
95    /// Layout: `[rkyv_bytes][footer_size: u64 LE][MAGIC]`
96    pub fn serialize(&self) -> Result<Vec<u8>> {
97        let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(self)
98            .map_err(|e| Error::Serialization(e.to_string()))?;
99
100        let footer_size = rkyv_bytes.len() as u64;
101        let mut out = Vec::with_capacity(rkyv_bytes.len() + TRAILER_SIZE);
102        out.extend_from_slice(&rkyv_bytes);
103        out.extend_from_slice(&footer_size.to_le_bytes());
104        out.extend_from_slice(&MAGIC);
105        Ok(out)
106    }
107
108    /// Deserializes a footer from bytes that include the trailer.
109    ///
110    /// `data` must contain at least the trailer and the footer payload.
111    pub fn deserialize(data: &[u8]) -> Result<Self> {
112        if data.len() < TRAILER_SIZE {
113            return Err(Error::InvalidFooter("data too short for trailer".into()));
114        }
115
116        let magic_start = data.len() - 4;
117        if data[magic_start..] != MAGIC {
118            return Err(Error::InvalidFooter("invalid magic bytes".into()));
119        }
120
121        let size_start = magic_start - 8;
122        let footer_size =
123            u64::from_le_bytes(data[size_start..magic_start].try_into().unwrap()) as usize;
124
125        if footer_size > size_start {
126            return Err(Error::InvalidFooter(
127                "footer_size exceeds available data".into(),
128            ));
129        }
130
131        let rkyv_start = size_start - footer_size;
132        let rkyv_bytes = &data[rkyv_start..size_start];
133
134        // Copy into an aligned buffer – the slice may not be aligned to the
135        // requirements of the archived types after being read from storage.
136        let mut aligned: rkyv::util::AlignedVec = rkyv::util::AlignedVec::new();
137        aligned.extend_from_slice(rkyv_bytes);
138
139        let footer = rkyv::from_bytes::<Self, rkyv::rancor::Error>(&aligned)
140            .map_err(|e| Error::Serialization(e.to_string()))?;
141
142        if footer.version != FOOTER_VERSION {
143            return Err(Error::InvalidFooter(format!(
144                "unsupported footer version {}, expected {}",
145                footer.version, FOOTER_VERSION
146            )));
147        }
148
149        Ok(footer)
150    }
151}
152
153/// Reads and deserializes the footer from storage.
154///
155/// Performs a two-pass read: first reads the 12-byte trailer to learn
156/// the footer size, then reads the full footer payload if needed.
157pub async fn read_footer(storage: &(dyn Storage + Sync)) -> Result<Footer> {
158    let file_size = storage.size().await?;
159    if (file_size as usize) < TRAILER_SIZE {
160        return Err(Error::InvalidFooter("file too short for trailer".into()));
161    }
162
163    // First pass: read the trailer to learn the footer size.
164    let trailer = storage
165        .read_range(file_size - TRAILER_SIZE as u64..file_size)
166        .await?;
167
168    if trailer[8..] != MAGIC {
169        return Err(Error::InvalidFooter("invalid magic bytes".into()));
170    }
171    let footer_size = u64::from_le_bytes(trailer[..8].try_into().unwrap()) as usize;
172    let total = footer_size + TRAILER_SIZE;
173
174    // Second pass: read footer payload + trailer.
175    let start = file_size - total as u64;
176    let data = storage.read_range(start..file_size).await?;
177    Footer::deserialize(&data)
178}
179
180impl Default for Footer {
181    fn default() -> Self {
182        Self::new()
183    }
184}
185
186#[cfg(test)]
187mod tests {
188    use super::*;
189    use crate::address::{BlockId, ChunkAddress};
190    use crate::block::CodecId;
191    use crate::dtype::DType;
192    use crate::layout::{ArrayLayout, ChunkEntry, StorageLayout};
193
194    #[test]
195    fn roundtrip_empty_footer() {
196        let footer = Footer::new();
197        let bytes = footer.serialize().unwrap();
198        let restored = Footer::deserialize(&bytes).unwrap();
199        assert_eq!(footer, restored);
200    }
201
202    #[test]
203    fn roundtrip_with_data() {
204        let footer = Footer {
205            version: FOOTER_VERSION,
206            blocks: vec![BlockMeta {
207                id: BlockId(0),
208                file_offset: 0,
209                compressed_size: 8192,
210                uncompressed_size: 8192,
211                codec: CodecId::None,
212            }],
213            arrays: vec![ArrayMeta {
214                name: "temperature".into(),
215                dtype: DType::Float32,
216                layout: ArrayLayout {
217                    shape: vec![1000, 1000],
218                    dimension_names: vec!["x".into(), "y".into()],
219                    storage: StorageLayout {
220                        chunk_shape: vec![1000, 1000],
221                        chunks: vec![ChunkEntry {
222                            coord: vec![0, 0],
223                            address: ChunkAddress {
224                                block_id: BlockId(0),
225                                offset: 0,
226                                size: 4000,
227                            },
228                        }],
229                    },
230                },
231                fill_value: Some(crate::layout::FillValue::Float(f64::NAN)),
232                deleted: false,
233                attributes: crate::layout::Attributes::U16(vec![]),
234            }],
235            attr_keys: vec![],
236            attr_values: vec![],
237            overlay_index: 0,
238            base_file_hint: String::new(),
239        };
240        let bytes = footer.serialize().unwrap();
241        let restored = Footer::deserialize(&bytes).unwrap();
242        assert_eq!(footer, restored);
243    }
244
245    #[test]
246    fn invalid_magic_detected() {
247        let mut bytes = Footer::new().serialize().unwrap();
248        let len = bytes.len();
249        bytes[len - 1] = b'X';
250        assert!(Footer::deserialize(&bytes).is_err());
251    }
252
253    #[test]
254    fn too_short_data() {
255        assert!(Footer::deserialize(&[0u8; 4]).is_err());
256    }
257
258    #[test]
259    fn trailer_has_correct_structure() {
260        let bytes = Footer::new().serialize().unwrap();
261        let len = bytes.len();
262
263        // Last 4 bytes are magic
264        assert_eq!(&bytes[len - 4..], b"ARRF");
265
266        // Preceding 8 bytes are footer_size as u64 LE
267        let footer_size = u64::from_le_bytes(bytes[len - 12..len - 4].try_into().unwrap());
268        assert_eq!(footer_size as usize, len - TRAILER_SIZE);
269    }
270}