1use crate::compiler::models::ModelField;
7use crate::map::types::*;
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct MapDelta {
14 pub domain: String,
16 pub base_hash: [u8; 32],
18 pub timestamp: DateTime<Utc>,
20 pub cortex_instance_id: String,
22 pub nodes_added: Vec<CompactNode>,
24 pub nodes_removed: Vec<u32>,
26 pub nodes_modified: Vec<(u32, FeatureDelta)>,
28 pub edges_added: Vec<(u32, u32)>,
30 pub edges_removed: Vec<(u32, u32)>,
32 pub schema_delta: Option<SchemaDelta>,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct CompactNode {
39 pub url_hash: u64,
41 pub url: String,
43 pub page_type: u8,
45 pub features: Vec<(u8, f32)>,
47}
48
49#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct FeatureDelta {
52 pub changed_dims: Vec<(u8, f32)>,
54}
55
56#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct SchemaDelta {
59 pub new_fields: Vec<(String, ModelField)>,
61 pub removed_fields: Vec<(String, String)>,
63}
64
65pub fn compute_delta(old_map: &SiteMap, new_map: &SiteMap, instance_id: &str) -> MapDelta {
67 let mut nodes_added: Vec<CompactNode> = Vec::new();
68 let mut nodes_removed: Vec<u32> = Vec::new();
69 let mut nodes_modified: Vec<(u32, FeatureDelta)> = Vec::new();
70 let mut edges_added: Vec<(u32, u32)> = Vec::new();
71 let mut edges_removed: Vec<(u32, u32)> = Vec::new();
72
73 let old_url_index: std::collections::HashMap<&str, usize> = old_map
75 .urls
76 .iter()
77 .enumerate()
78 .map(|(i, u)| (u.as_str(), i))
79 .collect();
80 let new_url_index: std::collections::HashMap<&str, usize> = new_map
81 .urls
82 .iter()
83 .enumerate()
84 .map(|(i, u)| (u.as_str(), i))
85 .collect();
86
87 for (new_idx, url) in new_map.urls.iter().enumerate() {
89 if let Some(&old_idx) = old_url_index.get(url.as_str()) {
90 if new_idx < new_map.features.len() && old_idx < old_map.features.len() {
92 let old_feats = &old_map.features[old_idx];
93 let new_feats = &new_map.features[new_idx];
94
95 let mut changed: Vec<(u8, f32)> = Vec::new();
96 for dim in 0..FEATURE_DIM {
97 let diff = (new_feats[dim] - old_feats[dim]).abs();
98 if diff > 0.001 {
99 changed.push((dim as u8, new_feats[dim]));
100 }
101 }
102
103 if !changed.is_empty() {
104 nodes_modified.push((
105 new_idx as u32,
106 FeatureDelta {
107 changed_dims: changed,
108 },
109 ));
110 }
111 }
112 } else {
113 let features: Vec<(u8, f32)> = if new_idx < new_map.features.len() {
115 new_map.features[new_idx]
116 .iter()
117 .enumerate()
118 .filter(|(_, &v)| v != 0.0)
119 .map(|(i, &v)| (i as u8, v))
120 .collect()
121 } else {
122 Vec::new()
123 };
124
125 let page_type = if new_idx < new_map.nodes.len() {
126 new_map.nodes[new_idx].page_type as u8
127 } else {
128 0
129 };
130
131 nodes_added.push(CompactNode {
132 url_hash: fnv_hash(url.as_bytes()),
133 url: url.clone(),
134 page_type,
135 features,
136 });
137 }
138 }
139
140 for (old_idx, url) in old_map.urls.iter().enumerate() {
142 if !new_url_index.contains_key(url.as_str()) {
143 nodes_removed.push(old_idx as u32);
144 }
145 }
146
147 let old_edges: std::collections::HashSet<(u32, u32)> = collect_edge_pairs(old_map);
149 let new_edges: std::collections::HashSet<(u32, u32)> = collect_edge_pairs(new_map);
150
151 for &(src, tgt) in &new_edges {
152 if !old_edges.contains(&(src, tgt)) {
153 edges_added.push((src, tgt));
154 }
155 }
156 for &(src, tgt) in &old_edges {
157 if !new_edges.contains(&(src, tgt)) {
158 edges_removed.push((src, tgt));
159 }
160 }
161
162 MapDelta {
163 domain: new_map.header.domain.clone(),
164 base_hash: hash_map(old_map),
165 timestamp: Utc::now(),
166 cortex_instance_id: instance_id.to_string(),
167 nodes_added,
168 nodes_removed,
169 nodes_modified,
170 edges_added,
171 edges_removed,
172 schema_delta: None,
173 }
174}
175
176pub fn apply_delta(map: &mut SiteMap, delta: &MapDelta) -> anyhow::Result<()> {
178 for (idx, feature_delta) in &delta.nodes_modified {
180 let idx = *idx as usize;
181 if idx < map.features.len() {
182 for &(dim, value) in &feature_delta.changed_dims {
183 map.features[idx][dim as usize] = value;
184 }
185 }
186 }
187
188 map.header.mapped_at = delta.timestamp.timestamp() as u64;
194
195 Ok(())
196}
197
198pub fn hash_map(map: &SiteMap) -> [u8; 32] {
200 use std::hash::{Hash, Hasher};
201 let mut hasher = fnv::FnvHasher::default();
202
203 map.header.domain.hash(&mut hasher);
204 map.header.node_count.hash(&mut hasher);
205 map.header.edge_count.hash(&mut hasher);
206
207 for url in &map.urls {
208 url.hash(&mut hasher);
209 }
210
211 for feats in &map.features {
212 for &f in feats {
213 f.to_bits().hash(&mut hasher);
214 }
215 }
216
217 let h = hasher.finish();
218 let mut result = [0u8; 32];
219 result[..8].copy_from_slice(&h.to_le_bytes());
220 for i in 1..4 {
222 let rotated = h.rotate_left(i * 16);
223 result[i as usize * 8..(i as usize + 1) * 8].copy_from_slice(&rotated.to_le_bytes());
224 }
225 result
226}
227
228pub fn serialize_delta(delta: &MapDelta) -> Vec<u8> {
230 serde_json::to_vec(delta).unwrap_or_default()
231}
232
233pub fn deserialize_delta(bytes: &[u8]) -> anyhow::Result<MapDelta> {
235 Ok(serde_json::from_slice(bytes)?)
236}
237
238fn collect_edge_pairs(map: &SiteMap) -> std::collections::HashSet<(u32, u32)> {
240 let mut pairs = std::collections::HashSet::new();
241 for (src_idx, _) in map.nodes.iter().enumerate() {
242 let edge_start = if src_idx < map.edge_index.len() {
243 map.edge_index[src_idx] as usize
244 } else {
245 continue;
246 };
247 let edge_end = if src_idx + 1 < map.edge_index.len() {
248 map.edge_index[src_idx + 1] as usize
249 } else {
250 map.edges.len()
251 };
252 for edge_idx in edge_start..edge_end {
253 if edge_idx < map.edges.len() {
254 pairs.insert((src_idx as u32, map.edges[edge_idx].target_node));
255 }
256 }
257 }
258 pairs
259}
260
261pub fn strip_private_data(map: &mut SiteMap) {
263 let auth_indices: Vec<usize> = map
265 .nodes
266 .iter()
267 .enumerate()
268 .filter(|(_, n)| n.flags.is_auth_required())
269 .map(|(i, _)| i)
270 .collect();
271
272 for features in &mut map.features {
274 features[88] = 0.0; features[89] = 0.0; for f in features.iter_mut().skip(112) {
278 *f = 0.0;
279 }
280 }
281
282 for &idx in &auth_indices {
284 if idx < map.features.len() {
285 map.features[idx] = [0.0; FEATURE_DIM];
286 }
287 }
288}
289
290fn fnv_hash(data: &[u8]) -> u64 {
292 let mut hash: u64 = 0xcbf29ce484222325;
293 for &byte in data {
294 hash ^= byte as u64;
295 hash = hash.wrapping_mul(0x100000001b3);
296 }
297 hash
298}
299
300#[cfg(test)]
301mod tests {
302 use super::*;
303 use crate::map::builder::SiteMapBuilder;
304
305 #[test]
306 fn test_compute_delta_no_changes() {
307 let mut builder = SiteMapBuilder::new("test.com");
308 let feats = [0.0f32; FEATURE_DIM];
309 builder.add_node("https://test.com/", PageType::Home, feats, 200);
310 let map = builder.build();
311
312 let delta = compute_delta(&map, &map, "instance-1");
313 assert!(delta.nodes_added.is_empty());
314 assert!(delta.nodes_removed.is_empty());
315 assert!(delta.nodes_modified.is_empty());
316 }
317
318 #[test]
319 fn test_compute_delta_detects_feature_change() {
320 let mut builder1 = SiteMapBuilder::new("test.com");
321 let mut feats = [0.0f32; FEATURE_DIM];
322 feats[FEAT_PRICE] = 100.0;
323 builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
324 let map1 = builder1.build();
325
326 let mut builder2 = SiteMapBuilder::new("test.com");
327 let mut feats2 = [0.0f32; FEATURE_DIM];
328 feats2[FEAT_PRICE] = 89.99;
329 builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
330 let map2 = builder2.build();
331
332 let delta = compute_delta(&map1, &map2, "instance-1");
333 assert_eq!(delta.nodes_modified.len(), 1);
334 assert_eq!(delta.nodes_modified[0].0, 0); }
336
337 #[test]
338 fn test_compute_delta_detects_new_node() {
339 let mut builder1 = SiteMapBuilder::new("test.com");
340 let feats = [0.0f32; FEATURE_DIM];
341 builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
342 let map1 = builder1.build();
343
344 let mut builder2 = SiteMapBuilder::new("test.com");
345 builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
346 builder2.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
347 let map2 = builder2.build();
348
349 let delta = compute_delta(&map1, &map2, "instance-1");
350 assert_eq!(delta.nodes_added.len(), 1);
351 assert_eq!(delta.nodes_added[0].url, "https://test.com/p2");
352 }
353
354 #[test]
355 fn test_serialize_deserialize_delta() {
356 let delta = MapDelta {
357 domain: "test.com".to_string(),
358 base_hash: [0u8; 32],
359 timestamp: Utc::now(),
360 cortex_instance_id: "test".to_string(),
361 nodes_added: vec![],
362 nodes_removed: vec![],
363 nodes_modified: vec![],
364 edges_added: vec![],
365 edges_removed: vec![],
366 schema_delta: None,
367 };
368
369 let bytes = serialize_delta(&delta);
370 let back = deserialize_delta(&bytes).unwrap();
371 assert_eq!(back.domain, "test.com");
372 }
373
374 #[test]
375 fn test_hash_map_deterministic() {
376 let mut builder = SiteMapBuilder::new("test.com");
377 let feats = [0.0f32; FEATURE_DIM];
378 builder.add_node("https://test.com/", PageType::Home, feats, 200);
379 let map = builder.build();
380
381 let h1 = hash_map(&map);
382 let h2 = hash_map(&map);
383 assert_eq!(h1, h2);
384 }
385
386 #[test]
387 fn test_strip_private_data() {
388 let mut builder = SiteMapBuilder::new("test.com");
389 let mut feats = [0.0f32; FEATURE_DIM];
390 feats[112] = 5.0; feats[113] = 3.0; builder.add_node("https://test.com/", PageType::Home, feats, 200);
393 let mut map = builder.build();
394
395 strip_private_data(&mut map);
396 assert_eq!(map.features[0][112], 0.0);
397 assert_eq!(map.features[0][113], 0.0);
398 }
399
400 #[test]
403 fn test_v4_delta_size_smaller_than_full_map() {
404 let mut builder1 = SiteMapBuilder::new("shop.com");
405 for i in 0..50 {
406 let mut feats = [0.0f32; FEATURE_DIM];
407 feats[FEAT_PRICE] = 100.0 + i as f32;
408 builder1.add_node(
409 &format!("https://shop.com/p/{i}"),
410 PageType::ProductDetail,
411 feats,
412 200,
413 );
414 }
415 let map1 = builder1.build();
416
417 let mut builder2 = SiteMapBuilder::new("shop.com");
419 for i in 0..50 {
420 let mut feats = [0.0f32; FEATURE_DIM];
421 feats[FEAT_PRICE] = if i < 3 {
422 80.0 + i as f32
423 } else {
424 100.0 + i as f32
425 };
426 builder2.add_node(
427 &format!("https://shop.com/p/{i}"),
428 PageType::ProductDetail,
429 feats,
430 200,
431 );
432 }
433 let map2 = builder2.build();
434
435 let delta = compute_delta(&map1, &map2, "test-instance");
436 assert_eq!(delta.nodes_modified.len(), 3, "only 3 prices changed");
437 assert!(delta.nodes_added.is_empty());
438 assert!(delta.nodes_removed.is_empty());
439
440 let delta_bytes = serialize_delta(&delta);
442 let map_bytes = map1.serialize();
443 assert!(
444 delta_bytes.len() < map_bytes.len() / 2,
445 "delta ({}) should be much smaller than full map ({})",
446 delta_bytes.len(),
447 map_bytes.len()
448 );
449 }
450
451 #[test]
452 fn test_v4_delta_metadata() {
453 let mut builder1 = SiteMapBuilder::new("test.com");
454 let feats = [0.0f32; FEATURE_DIM];
455 builder1.add_node("https://test.com/", PageType::Home, feats, 200);
456 let map1 = builder1.build();
457
458 let mut builder2 = SiteMapBuilder::new("test.com");
459 let mut feats2 = [0.0f32; FEATURE_DIM];
460 feats2[FEAT_PRICE] = 50.0;
461 builder2.add_node("https://test.com/", PageType::Home, feats2, 200);
462 let map2 = builder2.build();
463
464 let delta = compute_delta(&map1, &map2, "instance-42");
465
466 assert_eq!(delta.domain, "test.com");
467 assert_eq!(delta.cortex_instance_id, "instance-42");
468 assert_ne!(delta.base_hash, [0u8; 32], "base_hash should be set");
469 let age = Utc::now() - delta.timestamp;
471 assert!(age.num_seconds() < 10, "timestamp should be recent");
472 }
473
474 #[test]
475 fn test_v4_delta_roundtrip() {
476 let mut builder1 = SiteMapBuilder::new("test.com");
477 let mut feats = [0.0f32; FEATURE_DIM];
478 feats[FEAT_PRICE] = 100.0;
479 builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
480 let map1 = builder1.build();
481
482 let mut builder2 = SiteMapBuilder::new("test.com");
483 let mut feats2 = [0.0f32; FEATURE_DIM];
484 feats2[FEAT_PRICE] = 80.0;
485 builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
486 builder2.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
487 let map2 = builder2.build();
488
489 let delta = compute_delta(&map1, &map2, "test");
490 let bytes = serialize_delta(&delta);
491 let back = deserialize_delta(&bytes).unwrap();
492
493 assert_eq!(back.domain, delta.domain);
494 assert_eq!(back.nodes_added.len(), delta.nodes_added.len());
495 assert_eq!(back.nodes_modified.len(), delta.nodes_modified.len());
496 }
497
498 #[test]
499 fn test_v4_privacy_strips_all_session_features() {
500 let mut builder = SiteMapBuilder::new("test.com");
501 let mut feats = [0.0f32; FEATURE_DIM];
502 for (i, val) in feats[112..=127].iter_mut().enumerate() {
504 *val = (i + 1) as f32;
505 }
506 feats[FEAT_IS_AUTH_AREA] = 1.0;
508 builder.add_node("https://test.com/account", PageType::Account, feats, 200);
509 let mut map = builder.build();
510
511 strip_private_data(&mut map);
512
513 for dim in 112..=127 {
515 assert_eq!(
516 map.features[0][dim], 0.0,
517 "session dim {dim} should be cleared"
518 );
519 }
520 }
521
522 #[test]
523 fn test_v4_delta_detects_removed_nodes() {
524 let mut builder1 = SiteMapBuilder::new("test.com");
525 let feats = [0.0f32; FEATURE_DIM];
526 builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
527 builder1.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
528 builder1.add_node("https://test.com/p3", PageType::ProductDetail, feats, 200);
529 let map1 = builder1.build();
530
531 let mut builder2 = SiteMapBuilder::new("test.com");
532 builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
533 let map2 = builder2.build();
535
536 let delta = compute_delta(&map1, &map2, "test");
537 assert_eq!(
538 delta.nodes_removed.len(),
539 2,
540 "should detect 2 removed nodes"
541 );
542 }
543
544 #[test]
545 fn test_v4_hash_map_changes_with_content() {
546 let mut builder1 = SiteMapBuilder::new("test.com");
547 let feats = [0.0f32; FEATURE_DIM];
548 builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
549 let map1 = builder1.build();
550
551 let mut builder2 = SiteMapBuilder::new("test.com");
552 let mut feats2 = [0.0f32; FEATURE_DIM];
553 feats2[FEAT_PRICE] = 50.0;
554 builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
555 let map2 = builder2.build();
556
557 let h1 = hash_map(&map1);
558 let h2 = hash_map(&map2);
559 assert_ne!(h1, h2, "different maps should have different hashes");
560 }
561}