cortex_runtime/map/
deserializer.rs1use crate::map::serializer::crc32;
6use crate::map::types::*;
7use anyhow::{bail, Context, Result};
8use byteorder::{LittleEndian, ReadBytesExt};
9use std::io::Cursor;
10
11impl SiteMap {
12 pub fn deserialize(data: &[u8]) -> Result<Self> {
17 if data.len() < 4 {
19 bail!("map file too small: {} bytes", data.len());
20 }
21 let payload = &data[..data.len() - 4];
22 let stored_checksum = {
23 let mut c = Cursor::new(&data[data.len() - 4..]);
24 c.read_u32::<LittleEndian>().context("reading checksum")?
25 };
26 let computed_checksum = crc32(payload);
27 if stored_checksum != computed_checksum {
28 bail!(
29 "map file integrity check failed: checksum mismatch \
30 (stored 0x{:08X}, computed 0x{:08X}). File may be corrupted.",
31 stored_checksum,
32 computed_checksum
33 );
34 }
35
36 let mut r = Cursor::new(payload);
37
38 let magic = r.read_u32::<LittleEndian>().context("reading magic")?;
40 if magic != SITEMAP_MAGIC {
41 bail!(
42 "invalid magic bytes: expected 0x{:08X}, got 0x{:08X}",
43 SITEMAP_MAGIC,
44 magic
45 );
46 }
47
48 let format_version = r.read_u16::<LittleEndian>().context("reading version")?;
49 if format_version != FORMAT_VERSION {
50 bail!("unsupported format version: expected {FORMAT_VERSION}, got {format_version}");
51 }
52
53 let domain_length = r
54 .read_u16::<LittleEndian>()
55 .context("reading domain length")? as usize;
56 let mut domain_bytes = vec![0u8; domain_length];
57 std::io::Read::read_exact(&mut r, &mut domain_bytes).context("reading domain")?;
58 let domain = String::from_utf8(domain_bytes).context("domain not valid utf8")?;
59
60 let mapped_at = r.read_u64::<LittleEndian>().context("reading mapped_at")?;
61 let node_count = r.read_u32::<LittleEndian>().context("reading node_count")? as usize;
62 let edge_count = r.read_u32::<LittleEndian>().context("reading edge_count")? as usize;
63 let cluster_count = r
64 .read_u16::<LittleEndian>()
65 .context("reading cluster_count")? as usize;
66 let flags = r.read_u16::<LittleEndian>().context("reading flags")?;
67
68 let mut nodes = Vec::with_capacity(node_count);
70 for _ in 0..node_count {
71 let page_type = PageType::from_u8(r.read_u8()?);
72 let confidence = r.read_u8()?;
73 let freshness = r.read_u8()?;
74 let node_flags = NodeFlags(r.read_u8()?);
75 let content_hash = r.read_u32::<LittleEndian>()?;
76 let rendered_at = r.read_u32::<LittleEndian>()?;
77 let http_status = r.read_u16::<LittleEndian>()?;
78 let depth = r.read_u16::<LittleEndian>()?;
79 let inbound_count = r.read_u16::<LittleEndian>()?;
80 let outbound_count = r.read_u16::<LittleEndian>()?;
81 let feature_norm = r.read_f32::<LittleEndian>()?;
82 let reserved = r.read_u32::<LittleEndian>()?;
83
84 nodes.push(NodeRecord {
85 page_type,
86 confidence,
87 freshness,
88 flags: node_flags,
89 content_hash,
90 rendered_at,
91 http_status,
92 depth,
93 inbound_count,
94 outbound_count,
95 feature_norm,
96 reserved,
97 });
98 }
99
100 let mut edges = Vec::with_capacity(edge_count);
102 for _ in 0..edge_count {
103 let target_node = r.read_u32::<LittleEndian>()?;
104 let edge_type = EdgeType::from_u8(r.read_u8()?);
105 let weight = r.read_u8()?;
106 let edge_flags = EdgeFlags(r.read_u8()?);
107 let reserved = r.read_u8()?;
108
109 edges.push(EdgeRecord {
110 target_node,
111 edge_type,
112 weight,
113 flags: edge_flags,
114 reserved,
115 });
116 }
117
118 let mut edge_index = Vec::with_capacity(node_count + 1);
120 for _ in 0..=node_count {
121 edge_index.push(r.read_u32::<LittleEndian>()?);
122 }
123
124 let mut features = Vec::with_capacity(node_count);
126 for _ in 0..node_count {
127 let mut feat = [0.0f32; FEATURE_DIM];
128 for f in &mut feat {
129 *f = r.read_f32::<LittleEndian>()?;
130 }
131 features.push(feat);
132 }
133
134 let action_count = r.read_u32::<LittleEndian>()? as usize;
136 let mut actions = Vec::with_capacity(action_count);
137 for _ in 0..action_count {
138 let opcode_raw = r.read_u16::<LittleEndian>()?;
139 let target_node = r.read_i32::<LittleEndian>()?;
140 let cost_hint = r.read_u8()?;
141 let risk = r.read_u8()?;
142
143 actions.push(ActionRecord {
144 opcode: OpCode::from_u16(opcode_raw),
145 target_node,
146 cost_hint,
147 risk,
148 http_executable: false, });
150 }
151
152 let mut action_index = Vec::with_capacity(node_count + 1);
154 for _ in 0..=node_count {
155 action_index.push(r.read_u32::<LittleEndian>()?);
156 }
157
158 let mut cluster_assignments = Vec::with_capacity(node_count);
160 for _ in 0..node_count {
161 cluster_assignments.push(r.read_u16::<LittleEndian>()?);
162 }
163 let mut cluster_centroids = Vec::with_capacity(cluster_count);
164 for _ in 0..cluster_count {
165 let mut centroid = [0.0f32; FEATURE_DIM];
166 for f in &mut centroid {
167 *f = r.read_f32::<LittleEndian>()?;
168 }
169 cluster_centroids.push(centroid);
170 }
171
172 let url_data_len = r.read_u32::<LittleEndian>()? as usize;
174 let mut url_data = vec![0u8; url_data_len];
175 std::io::Read::read_exact(&mut r, &mut url_data)?;
176
177 let mut url_offsets = Vec::with_capacity(node_count);
178 for _ in 0..node_count {
179 url_offsets.push(r.read_u32::<LittleEndian>()? as usize);
180 }
181
182 let mut urls = Vec::with_capacity(node_count);
184 for &offset in &url_offsets {
185 let end = url_data[offset..]
186 .iter()
187 .position(|&b| b == 0)
188 .map(|p| offset + p)
189 .unwrap_or(url_data_len);
190 let url = String::from_utf8_lossy(&url_data[offset..end]).to_string();
191 urls.push(url);
192 }
193
194 let header = MapHeader {
195 magic,
196 format_version,
197 domain,
198 mapped_at,
199 node_count: node_count as u32,
200 edge_count: edge_count as u32,
201 cluster_count: cluster_count as u16,
202 flags,
203 };
204
205 Ok(SiteMap {
206 header,
207 nodes,
208 edges,
209 edge_index,
210 features,
211 actions,
212 action_index,
213 cluster_assignments,
214 cluster_centroids,
215 urls,
216 })
217 }
218}