Skip to main content

cortex_runtime/collective/
delta.rs

1//! Delta format for incremental map updates.
2//!
3//! Computes compact diffs between SiteMap versions, enabling efficient sync
4//! without retransmitting full maps.
5
6use crate::compiler::models::ModelField;
7use crate::map::types::*;
8use chrono::{DateTime, Utc};
9use serde::{Deserialize, Serialize};
10
11/// A delta between two versions of a SiteMap.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct MapDelta {
14    /// Domain this delta applies to.
15    pub domain: String,
16    /// Hash of the base map this delta applies to.
17    pub base_hash: [u8; 32],
18    /// When this delta was computed.
19    pub timestamp: DateTime<Utc>,
20    /// Which Cortex instance produced this delta.
21    pub cortex_instance_id: String,
22    /// New nodes added.
23    pub nodes_added: Vec<CompactNode>,
24    /// Node indices removed.
25    pub nodes_removed: Vec<u32>,
26    /// Nodes with changed features.
27    pub nodes_modified: Vec<(u32, FeatureDelta)>,
28    /// New edges added (source, target).
29    pub edges_added: Vec<(u32, u32)>,
30    /// Edges removed (source, target).
31    pub edges_removed: Vec<(u32, u32)>,
32    /// Schema changes, if any.
33    pub schema_delta: Option<SchemaDelta>,
34}
35
36/// A compact node representation for deltas (sparse features).
37#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct CompactNode {
39    /// FNV hash of the URL.
40    pub url_hash: u64,
41    /// URL string.
42    pub url: String,
43    /// Page type byte.
44    pub page_type: u8,
45    /// Only non-zero feature dimensions.
46    pub features: Vec<(u8, f32)>,
47}
48
49/// Changed feature dimensions for a modified node.
50#[derive(Debug, Clone, Serialize, Deserialize)]
51pub struct FeatureDelta {
52    /// (dimension, new_value) pairs.
53    pub changed_dims: Vec<(u8, f32)>,
54}
55
56/// Schema-level changes.
57#[derive(Debug, Clone, Serialize, Deserialize)]
58pub struct SchemaDelta {
59    /// New fields discovered: (model_name, field).
60    pub new_fields: Vec<(String, ModelField)>,
61    /// Fields removed: (model_name, field_name).
62    pub removed_fields: Vec<(String, String)>,
63}
64
65/// Compute a delta between an old and new version of a SiteMap.
66pub fn compute_delta(old_map: &SiteMap, new_map: &SiteMap, instance_id: &str) -> MapDelta {
67    let mut nodes_added: Vec<CompactNode> = Vec::new();
68    let mut nodes_removed: Vec<u32> = Vec::new();
69    let mut nodes_modified: Vec<(u32, FeatureDelta)> = Vec::new();
70    let mut edges_added: Vec<(u32, u32)> = Vec::new();
71    let mut edges_removed: Vec<(u32, u32)> = Vec::new();
72
73    // Build URL → index maps for both
74    let old_url_index: std::collections::HashMap<&str, usize> = old_map
75        .urls
76        .iter()
77        .enumerate()
78        .map(|(i, u)| (u.as_str(), i))
79        .collect();
80    let new_url_index: std::collections::HashMap<&str, usize> = new_map
81        .urls
82        .iter()
83        .enumerate()
84        .map(|(i, u)| (u.as_str(), i))
85        .collect();
86
87    // Find added and modified nodes
88    for (new_idx, url) in new_map.urls.iter().enumerate() {
89        if let Some(&old_idx) = old_url_index.get(url.as_str()) {
90            // Node exists in both — check for modifications
91            if new_idx < new_map.features.len() && old_idx < old_map.features.len() {
92                let old_feats = &old_map.features[old_idx];
93                let new_feats = &new_map.features[new_idx];
94
95                let mut changed: Vec<(u8, f32)> = Vec::new();
96                for dim in 0..FEATURE_DIM {
97                    let diff = (new_feats[dim] - old_feats[dim]).abs();
98                    if diff > 0.001 {
99                        changed.push((dim as u8, new_feats[dim]));
100                    }
101                }
102
103                if !changed.is_empty() {
104                    nodes_modified.push((
105                        new_idx as u32,
106                        FeatureDelta {
107                            changed_dims: changed,
108                        },
109                    ));
110                }
111            }
112        } else {
113            // New node
114            let features: Vec<(u8, f32)> = if new_idx < new_map.features.len() {
115                new_map.features[new_idx]
116                    .iter()
117                    .enumerate()
118                    .filter(|(_, &v)| v != 0.0)
119                    .map(|(i, &v)| (i as u8, v))
120                    .collect()
121            } else {
122                Vec::new()
123            };
124
125            let page_type = if new_idx < new_map.nodes.len() {
126                new_map.nodes[new_idx].page_type as u8
127            } else {
128                0
129            };
130
131            nodes_added.push(CompactNode {
132                url_hash: fnv_hash(url.as_bytes()),
133                url: url.clone(),
134                page_type,
135                features,
136            });
137        }
138    }
139
140    // Find removed nodes
141    for (old_idx, url) in old_map.urls.iter().enumerate() {
142        if !new_url_index.contains_key(url.as_str()) {
143            nodes_removed.push(old_idx as u32);
144        }
145    }
146
147    // Compare edges (simplified: check by source/target pairs)
148    let old_edges: std::collections::HashSet<(u32, u32)> = collect_edge_pairs(old_map);
149    let new_edges: std::collections::HashSet<(u32, u32)> = collect_edge_pairs(new_map);
150
151    for &(src, tgt) in &new_edges {
152        if !old_edges.contains(&(src, tgt)) {
153            edges_added.push((src, tgt));
154        }
155    }
156    for &(src, tgt) in &old_edges {
157        if !new_edges.contains(&(src, tgt)) {
158            edges_removed.push((src, tgt));
159        }
160    }
161
162    MapDelta {
163        domain: new_map.header.domain.clone(),
164        base_hash: hash_map(old_map),
165        timestamp: Utc::now(),
166        cortex_instance_id: instance_id.to_string(),
167        nodes_added,
168        nodes_removed,
169        nodes_modified,
170        edges_added,
171        edges_removed,
172        schema_delta: None,
173    }
174}
175
176/// Apply a delta to a SiteMap (mutates in place).
177pub fn apply_delta(map: &mut SiteMap, delta: &MapDelta) -> anyhow::Result<()> {
178    // Apply modifications
179    for (idx, feature_delta) in &delta.nodes_modified {
180        let idx = *idx as usize;
181        if idx < map.features.len() {
182            for &(dim, value) in &feature_delta.changed_dims {
183                map.features[idx][dim as usize] = value;
184            }
185        }
186    }
187
188    // Note: Adding and removing nodes requires rebuilding CSR indexes,
189    // which is more complex. For now, modifications are the primary use case.
190    // Full add/remove support would require a SiteMapBuilder-like approach.
191
192    // Update header timestamp
193    map.header.mapped_at = delta.timestamp.timestamp() as u64;
194
195    Ok(())
196}
197
198/// Compute a content hash of a SiteMap for delta base verification.
199pub fn hash_map(map: &SiteMap) -> [u8; 32] {
200    use std::hash::{Hash, Hasher};
201    let mut hasher = fnv::FnvHasher::default();
202
203    map.header.domain.hash(&mut hasher);
204    map.header.node_count.hash(&mut hasher);
205    map.header.edge_count.hash(&mut hasher);
206
207    for url in &map.urls {
208        url.hash(&mut hasher);
209    }
210
211    for feats in &map.features {
212        for &f in feats {
213            f.to_bits().hash(&mut hasher);
214        }
215    }
216
217    let h = hasher.finish();
218    let mut result = [0u8; 32];
219    result[..8].copy_from_slice(&h.to_le_bytes());
220    // Fill rest with secondary hash rotations
221    for i in 1..4 {
222        let rotated = h.rotate_left(i * 16);
223        result[i as usize * 8..(i as usize + 1) * 8].copy_from_slice(&rotated.to_le_bytes());
224    }
225    result
226}
227
228/// Serialize a delta to compact binary.
229pub fn serialize_delta(delta: &MapDelta) -> Vec<u8> {
230    serde_json::to_vec(delta).unwrap_or_default()
231}
232
233/// Deserialize a delta from binary.
234pub fn deserialize_delta(bytes: &[u8]) -> anyhow::Result<MapDelta> {
235    Ok(serde_json::from_slice(bytes)?)
236}
237
238/// Collect all edge pairs from a SiteMap.
239fn collect_edge_pairs(map: &SiteMap) -> std::collections::HashSet<(u32, u32)> {
240    let mut pairs = std::collections::HashSet::new();
241    for (src_idx, _) in map.nodes.iter().enumerate() {
242        let edge_start = if src_idx < map.edge_index.len() {
243            map.edge_index[src_idx] as usize
244        } else {
245            continue;
246        };
247        let edge_end = if src_idx + 1 < map.edge_index.len() {
248            map.edge_index[src_idx + 1] as usize
249        } else {
250            map.edges.len()
251        };
252        for edge_idx in edge_start..edge_end {
253            if edge_idx < map.edges.len() {
254                pairs.insert((src_idx as u32, map.edges[edge_idx].target_node));
255            }
256        }
257    }
258    pairs
259}
260
261/// Strip private/session data before sharing.
262pub fn strip_private_data(map: &mut SiteMap) {
263    // Remove auth-required nodes
264    let auth_indices: Vec<usize> = map
265        .nodes
266        .iter()
267        .enumerate()
268        .filter(|(_, n)| n.flags.is_auth_required())
269        .map(|(i, _)| i)
270        .collect();
271
272    // Clear session-specific features (dims 112-127) and privacy-sensitive dims
273    for features in &mut map.features {
274        features[88] = 0.0; // cookie_consent_blocking
275        features[89] = 0.0; // popup_count
276                            // Zero all session features (112-127)
277        for f in features.iter_mut().skip(112) {
278            *f = 0.0;
279        }
280    }
281
282    // Clear auth-walled node features (zero them out rather than removing)
283    for &idx in &auth_indices {
284        if idx < map.features.len() {
285            map.features[idx] = [0.0; FEATURE_DIM];
286        }
287    }
288}
289
290/// FNV-1a hash for URL hashing.
291fn fnv_hash(data: &[u8]) -> u64 {
292    let mut hash: u64 = 0xcbf29ce484222325;
293    for &byte in data {
294        hash ^= byte as u64;
295        hash = hash.wrapping_mul(0x100000001b3);
296    }
297    hash
298}
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use crate::map::builder::SiteMapBuilder;
304
305    #[test]
306    fn test_compute_delta_no_changes() {
307        let mut builder = SiteMapBuilder::new("test.com");
308        let feats = [0.0f32; FEATURE_DIM];
309        builder.add_node("https://test.com/", PageType::Home, feats, 200);
310        let map = builder.build();
311
312        let delta = compute_delta(&map, &map, "instance-1");
313        assert!(delta.nodes_added.is_empty());
314        assert!(delta.nodes_removed.is_empty());
315        assert!(delta.nodes_modified.is_empty());
316    }
317
318    #[test]
319    fn test_compute_delta_detects_feature_change() {
320        let mut builder1 = SiteMapBuilder::new("test.com");
321        let mut feats = [0.0f32; FEATURE_DIM];
322        feats[FEAT_PRICE] = 100.0;
323        builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
324        let map1 = builder1.build();
325
326        let mut builder2 = SiteMapBuilder::new("test.com");
327        let mut feats2 = [0.0f32; FEATURE_DIM];
328        feats2[FEAT_PRICE] = 89.99;
329        builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
330        let map2 = builder2.build();
331
332        let delta = compute_delta(&map1, &map2, "instance-1");
333        assert_eq!(delta.nodes_modified.len(), 1);
334        assert_eq!(delta.nodes_modified[0].0, 0); // node 0 modified
335    }
336
337    #[test]
338    fn test_compute_delta_detects_new_node() {
339        let mut builder1 = SiteMapBuilder::new("test.com");
340        let feats = [0.0f32; FEATURE_DIM];
341        builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
342        let map1 = builder1.build();
343
344        let mut builder2 = SiteMapBuilder::new("test.com");
345        builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
346        builder2.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
347        let map2 = builder2.build();
348
349        let delta = compute_delta(&map1, &map2, "instance-1");
350        assert_eq!(delta.nodes_added.len(), 1);
351        assert_eq!(delta.nodes_added[0].url, "https://test.com/p2");
352    }
353
354    #[test]
355    fn test_serialize_deserialize_delta() {
356        let delta = MapDelta {
357            domain: "test.com".to_string(),
358            base_hash: [0u8; 32],
359            timestamp: Utc::now(),
360            cortex_instance_id: "test".to_string(),
361            nodes_added: vec![],
362            nodes_removed: vec![],
363            nodes_modified: vec![],
364            edges_added: vec![],
365            edges_removed: vec![],
366            schema_delta: None,
367        };
368
369        let bytes = serialize_delta(&delta);
370        let back = deserialize_delta(&bytes).unwrap();
371        assert_eq!(back.domain, "test.com");
372    }
373
374    #[test]
375    fn test_hash_map_deterministic() {
376        let mut builder = SiteMapBuilder::new("test.com");
377        let feats = [0.0f32; FEATURE_DIM];
378        builder.add_node("https://test.com/", PageType::Home, feats, 200);
379        let map = builder.build();
380
381        let h1 = hash_map(&map);
382        let h2 = hash_map(&map);
383        assert_eq!(h1, h2);
384    }
385
386    #[test]
387    fn test_strip_private_data() {
388        let mut builder = SiteMapBuilder::new("test.com");
389        let mut feats = [0.0f32; FEATURE_DIM];
390        feats[112] = 5.0; // session_page_count
391        feats[113] = 3.0; // session_action_count
392        builder.add_node("https://test.com/", PageType::Home, feats, 200);
393        let mut map = builder.build();
394
395        strip_private_data(&mut map);
396        assert_eq!(map.features[0][112], 0.0);
397        assert_eq!(map.features[0][113], 0.0);
398    }
399
400    // ── v4 Test Suite: Phase 2A — Delta Computation ──
401
402    #[test]
403    fn test_v4_delta_size_smaller_than_full_map() {
404        let mut builder1 = SiteMapBuilder::new("shop.com");
405        for i in 0..50 {
406            let mut feats = [0.0f32; FEATURE_DIM];
407            feats[FEAT_PRICE] = 100.0 + i as f32;
408            builder1.add_node(
409                &format!("https://shop.com/p/{i}"),
410                PageType::ProductDetail,
411                feats,
412                200,
413            );
414        }
415        let map1 = builder1.build();
416
417        // Change only 3 prices
418        let mut builder2 = SiteMapBuilder::new("shop.com");
419        for i in 0..50 {
420            let mut feats = [0.0f32; FEATURE_DIM];
421            feats[FEAT_PRICE] = if i < 3 {
422                80.0 + i as f32
423            } else {
424                100.0 + i as f32
425            };
426            builder2.add_node(
427                &format!("https://shop.com/p/{i}"),
428                PageType::ProductDetail,
429                feats,
430                200,
431            );
432        }
433        let map2 = builder2.build();
434
435        let delta = compute_delta(&map1, &map2, "test-instance");
436        assert_eq!(delta.nodes_modified.len(), 3, "only 3 prices changed");
437        assert!(delta.nodes_added.is_empty());
438        assert!(delta.nodes_removed.is_empty());
439
440        // Delta serialized size should be much smaller than full map
441        let delta_bytes = serialize_delta(&delta);
442        let map_bytes = map1.serialize();
443        assert!(
444            delta_bytes.len() < map_bytes.len() / 2,
445            "delta ({}) should be much smaller than full map ({})",
446            delta_bytes.len(),
447            map_bytes.len()
448        );
449    }
450
451    #[test]
452    fn test_v4_delta_metadata() {
453        let mut builder1 = SiteMapBuilder::new("test.com");
454        let feats = [0.0f32; FEATURE_DIM];
455        builder1.add_node("https://test.com/", PageType::Home, feats, 200);
456        let map1 = builder1.build();
457
458        let mut builder2 = SiteMapBuilder::new("test.com");
459        let mut feats2 = [0.0f32; FEATURE_DIM];
460        feats2[FEAT_PRICE] = 50.0;
461        builder2.add_node("https://test.com/", PageType::Home, feats2, 200);
462        let map2 = builder2.build();
463
464        let delta = compute_delta(&map1, &map2, "instance-42");
465
466        assert_eq!(delta.domain, "test.com");
467        assert_eq!(delta.cortex_instance_id, "instance-42");
468        assert_ne!(delta.base_hash, [0u8; 32], "base_hash should be set");
469        // timestamp should be recent
470        let age = Utc::now() - delta.timestamp;
471        assert!(age.num_seconds() < 10, "timestamp should be recent");
472    }
473
474    #[test]
475    fn test_v4_delta_roundtrip() {
476        let mut builder1 = SiteMapBuilder::new("test.com");
477        let mut feats = [0.0f32; FEATURE_DIM];
478        feats[FEAT_PRICE] = 100.0;
479        builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
480        let map1 = builder1.build();
481
482        let mut builder2 = SiteMapBuilder::new("test.com");
483        let mut feats2 = [0.0f32; FEATURE_DIM];
484        feats2[FEAT_PRICE] = 80.0;
485        builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
486        builder2.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
487        let map2 = builder2.build();
488
489        let delta = compute_delta(&map1, &map2, "test");
490        let bytes = serialize_delta(&delta);
491        let back = deserialize_delta(&bytes).unwrap();
492
493        assert_eq!(back.domain, delta.domain);
494        assert_eq!(back.nodes_added.len(), delta.nodes_added.len());
495        assert_eq!(back.nodes_modified.len(), delta.nodes_modified.len());
496    }
497
498    #[test]
499    fn test_v4_privacy_strips_all_session_features() {
500        let mut builder = SiteMapBuilder::new("test.com");
501        let mut feats = [0.0f32; FEATURE_DIM];
502        // Set all session features (dims 112-127)
503        for (i, val) in feats[112..=127].iter_mut().enumerate() {
504            *val = (i + 1) as f32;
505        }
506        // Also set auth area flag
507        feats[FEAT_IS_AUTH_AREA] = 1.0;
508        builder.add_node("https://test.com/account", PageType::Account, feats, 200);
509        let mut map = builder.build();
510
511        strip_private_data(&mut map);
512
513        // All session dims should be zeroed
514        for dim in 112..=127 {
515            assert_eq!(
516                map.features[0][dim], 0.0,
517                "session dim {dim} should be cleared"
518            );
519        }
520    }
521
522    #[test]
523    fn test_v4_delta_detects_removed_nodes() {
524        let mut builder1 = SiteMapBuilder::new("test.com");
525        let feats = [0.0f32; FEATURE_DIM];
526        builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
527        builder1.add_node("https://test.com/p2", PageType::ProductDetail, feats, 200);
528        builder1.add_node("https://test.com/p3", PageType::ProductDetail, feats, 200);
529        let map1 = builder1.build();
530
531        let mut builder2 = SiteMapBuilder::new("test.com");
532        builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
533        // p2 and p3 removed
534        let map2 = builder2.build();
535
536        let delta = compute_delta(&map1, &map2, "test");
537        assert_eq!(
538            delta.nodes_removed.len(),
539            2,
540            "should detect 2 removed nodes"
541        );
542    }
543
544    #[test]
545    fn test_v4_hash_map_changes_with_content() {
546        let mut builder1 = SiteMapBuilder::new("test.com");
547        let feats = [0.0f32; FEATURE_DIM];
548        builder1.add_node("https://test.com/p1", PageType::ProductDetail, feats, 200);
549        let map1 = builder1.build();
550
551        let mut builder2 = SiteMapBuilder::new("test.com");
552        let mut feats2 = [0.0f32; FEATURE_DIM];
553        feats2[FEAT_PRICE] = 50.0;
554        builder2.add_node("https://test.com/p1", PageType::ProductDetail, feats2, 200);
555        let map2 = builder2.build();
556
557        let h1 = hash_map(&map1);
558        let h2 = hash_map(&map2);
559        assert_ne!(h1, h2, "different maps should have different hashes");
560    }
561}