Skip to main content

cortex_runtime/map/
serializer.rs

1//! Serialize a SiteMap to the binary CTX format.
2//!
3//! The format ends with a 4-byte CRC32 checksum (IEEE) of all preceding bytes,
4//! allowing integrity verification on load.
5
6use crate::map::types::*;
7use byteorder::{LittleEndian, WriteBytesExt};
8use std::io::Write;
9
10/// Compute CRC32 (IEEE/ISO 3309) checksum of data.
11pub(crate) fn crc32(data: &[u8]) -> u32 {
12    let mut crc: u32 = 0xFFFF_FFFF;
13    for &byte in data {
14        let index = ((crc ^ byte as u32) & 0xFF) as usize;
15        crc = CRC32_TABLE[index] ^ (crc >> 8);
16    }
17    crc ^ 0xFFFF_FFFF
18}
19
20/// CRC32 lookup table (IEEE polynomial 0xEDB88320).
21const CRC32_TABLE: [u32; 256] = {
22    let mut table = [0u32; 256];
23    let mut i = 0;
24    while i < 256 {
25        let mut crc = i as u32;
26        let mut j = 0;
27        while j < 8 {
28            if crc & 1 != 0 {
29                crc = 0xEDB8_8320 ^ (crc >> 1);
30            } else {
31                crc >>= 1;
32            }
33            j += 1;
34        }
35        table[i] = crc;
36        i += 1;
37    }
38    table
39};
40
41impl SiteMap {
42    /// Serialize the SiteMap to binary CTX format with trailing CRC32 checksum.
43    pub fn serialize(&self) -> Vec<u8> {
44        let mut buf = Vec::new();
45        self.write_to(&mut buf)
46            .expect("serialization to Vec should not fail");
47
48        // Append CRC32 checksum of all preceding bytes
49        let checksum = crc32(&buf);
50        buf.write_u32::<LittleEndian>(checksum)
51            .expect("checksum write to Vec should not fail");
52
53        buf
54    }
55
56    fn write_to<W: Write>(&self, w: &mut W) -> std::io::Result<()> {
57        // ─── Header ───────────────────────────────────────
58        w.write_u32::<LittleEndian>(self.header.magic)?;
59        w.write_u16::<LittleEndian>(self.header.format_version)?;
60
61        let domain_bytes = self.header.domain.as_bytes();
62        w.write_u16::<LittleEndian>(domain_bytes.len() as u16)?;
63        w.write_all(domain_bytes)?;
64
65        w.write_u64::<LittleEndian>(self.header.mapped_at)?;
66        w.write_u32::<LittleEndian>(self.header.node_count)?;
67        w.write_u32::<LittleEndian>(self.header.edge_count)?;
68        w.write_u16::<LittleEndian>(self.header.cluster_count)?;
69        w.write_u16::<LittleEndian>(self.header.flags)?;
70
71        // ─── Node Table ───────────────────────────────────
72        for node in &self.nodes {
73            w.write_u8(node.page_type as u8)?;
74            w.write_u8(node.confidence)?;
75            w.write_u8(node.freshness)?;
76            w.write_u8(node.flags.0)?;
77            w.write_u32::<LittleEndian>(node.content_hash)?;
78            w.write_u32::<LittleEndian>(node.rendered_at)?;
79            w.write_u16::<LittleEndian>(node.http_status)?;
80            w.write_u16::<LittleEndian>(node.depth)?;
81            w.write_u16::<LittleEndian>(node.inbound_count)?;
82            w.write_u16::<LittleEndian>(node.outbound_count)?;
83            w.write_f32::<LittleEndian>(node.feature_norm)?;
84            w.write_u32::<LittleEndian>(node.reserved)?;
85        }
86
87        // ─── Edge Table ───────────────────────────────────
88        for edge in &self.edges {
89            w.write_u32::<LittleEndian>(edge.target_node)?;
90            w.write_u8(edge.edge_type as u8)?;
91            w.write_u8(edge.weight)?;
92            w.write_u8(edge.flags.0)?;
93            w.write_u8(edge.reserved)?;
94        }
95
96        // Edge CSR index
97        for &idx in &self.edge_index {
98            w.write_u32::<LittleEndian>(idx)?;
99        }
100
101        // ─── Feature Matrix ──────────────────────────────
102        for feat_vec in &self.features {
103            for &f in feat_vec {
104                w.write_f32::<LittleEndian>(f)?;
105            }
106        }
107
108        // ─── Action Catalog ──────────────────────────────
109        // Action count
110        w.write_u32::<LittleEndian>(self.actions.len() as u32)?;
111
112        for action in &self.actions {
113            w.write_u16::<LittleEndian>(action.opcode.as_u16())?;
114            w.write_i32::<LittleEndian>(action.target_node)?;
115            w.write_u8(action.cost_hint)?;
116            w.write_u8(action.risk)?;
117        }
118
119        // Action CSR index
120        for &idx in &self.action_index {
121            w.write_u32::<LittleEndian>(idx)?;
122        }
123
124        // ─── Cluster Table ───────────────────────────────
125        for &assignment in &self.cluster_assignments {
126            w.write_u16::<LittleEndian>(assignment)?;
127        }
128        for centroid in &self.cluster_centroids {
129            for &f in centroid {
130                w.write_f32::<LittleEndian>(f)?;
131            }
132        }
133
134        // ─── URL Table ───────────────────────────────────
135        // First write all URL bytes concatenated with null terminators
136        let mut url_data = Vec::new();
137        let mut url_offsets = Vec::new();
138        for url in &self.urls {
139            url_offsets.push(url_data.len() as u32);
140            url_data.extend_from_slice(url.as_bytes());
141            url_data.push(0); // null terminator
142        }
143
144        // URL data length
145        w.write_u32::<LittleEndian>(url_data.len() as u32)?;
146        w.write_all(&url_data)?;
147
148        // URL offsets
149        for &offset in &url_offsets {
150            w.write_u32::<LittleEndian>(offset)?;
151        }
152
153        Ok(())
154    }
155}