Skip to main content

graphmind/graph/
catalog.rs

1//! Triple-level statistics for cost-based query optimization (ADR-015)
2//!
3//! GraphCatalog provides fine-grained statistics at the (source_label, edge_type, target_label)
4//! level, enabling the graph-native planner to accurately estimate cardinalities and choose
5//! optimal traversal directions.
6
7use super::types::{EdgeType, Label, NodeId};
8use std::collections::HashMap;
9
10/// A triple pattern representing a (source_label, edge_type, target_label) combination
11#[derive(Debug, Clone, PartialEq, Eq, Hash)]
12pub struct TriplePattern {
13    pub source_label: Label,
14    pub edge_type: EdgeType,
15    pub target_label: Label,
16}
17
18impl TriplePattern {
19    pub fn new(
20        source_label: impl Into<Label>,
21        edge_type: impl Into<EdgeType>,
22        target_label: impl Into<Label>,
23    ) -> Self {
24        TriplePattern {
25            source_label: source_label.into(),
26            edge_type: edge_type.into(),
27            target_label: target_label.into(),
28        }
29    }
30}
31
32/// Statistics for a single triple pattern
33#[derive(Debug, Clone)]
34pub struct TripleStats {
35    /// Number of edges matching this triple pattern
36    pub count: usize,
37    /// Average outgoing degree from source nodes for this pattern
38    pub avg_out_degree: f64,
39    /// Average incoming degree to target nodes for this pattern
40    pub avg_in_degree: f64,
41    /// Number of distinct source nodes
42    pub distinct_sources: usize,
43    /// Number of distinct target nodes
44    pub distinct_targets: usize,
45    /// Maximum outgoing degree for this pattern
46    pub max_out_degree: usize,
47}
48
49impl TripleStats {
50    fn new() -> Self {
51        TripleStats {
52            count: 0,
53            avg_out_degree: 0.0,
54            avg_in_degree: 0.0,
55            distinct_sources: 0,
56            distinct_targets: 0,
57            max_out_degree: 0,
58        }
59    }
60}
61
62/// Incrementally maintained graph catalog for cost-based optimization.
63///
64/// Extends (does not replace) the existing GraphStatistics.
65/// New planner uses GraphCatalog; old planner path keeps working.
66#[derive(Debug, Clone)]
67pub struct GraphCatalog {
68    /// Node count per label
69    pub label_counts: HashMap<Label, usize>,
70    /// Statistics per triple pattern (source_label, edge_type, target_label)
71    triple_stats: HashMap<TriplePattern, TripleStats>,
72    /// Per-triple per-source outgoing degree: triple_pattern -> source_node -> out_degree
73    source_degrees: HashMap<TriplePattern, HashMap<NodeId, usize>>,
74    /// Per-triple per-target incoming degree: triple_pattern -> target_node -> in_degree
75    target_degrees: HashMap<TriplePattern, HashMap<NodeId, usize>>,
76    /// Monotonically increasing version for cache invalidation
77    pub generation: u64,
78}
79
80impl GraphCatalog {
81    /// Create a new empty catalog
82    pub fn new() -> Self {
83        GraphCatalog {
84            label_counts: HashMap::new(),
85            triple_stats: HashMap::new(),
86            source_degrees: HashMap::new(),
87            target_degrees: HashMap::new(),
88            generation: 0,
89        }
90    }
91
92    /// Notify the catalog that a label was added to a node
93    pub fn on_label_added(&mut self, label: &Label) {
94        *self.label_counts.entry(label.clone()).or_insert(0) += 1;
95        self.generation += 1;
96    }
97
98    /// Notify the catalog that a label was removed from a node (e.g., node deleted)
99    pub fn on_label_removed(&mut self, label: &Label) {
100        if let Some(count) = self.label_counts.get_mut(label) {
101            *count = count.saturating_sub(1);
102        }
103        self.generation += 1;
104    }
105
106    /// Notify the catalog that an edge was created
107    ///
108    /// For a multi-label source and multi-label target, this generates
109    /// one triple entry per (src_label, edge_type, tgt_label) combination.
110    pub fn on_edge_created(
111        &mut self,
112        source_id: NodeId,
113        src_labels: &[Label],
114        edge_type: &EdgeType,
115        target_id: NodeId,
116        tgt_labels: &[Label],
117    ) {
118        for src_label in src_labels {
119            for tgt_label in tgt_labels {
120                let pattern =
121                    TriplePattern::new(src_label.clone(), edge_type.clone(), tgt_label.clone());
122
123                // Update source degree tracking
124                let src_degree = self
125                    .source_degrees
126                    .entry(pattern.clone())
127                    .or_default()
128                    .entry(source_id)
129                    .or_insert(0);
130                *src_degree += 1;
131                let new_src_degree = *src_degree;
132
133                // Update target degree tracking
134                let tgt_degree = self
135                    .target_degrees
136                    .entry(pattern.clone())
137                    .or_default()
138                    .entry(target_id)
139                    .or_insert(0);
140                *tgt_degree += 1;
141
142                // Update triple stats
143                let stats = self
144                    .triple_stats
145                    .entry(pattern.clone())
146                    .or_insert_with(TripleStats::new);
147                stats.count += 1;
148
149                // Recompute distinct sources/targets from degree maps
150                let src_map = self.source_degrees.get(&pattern).unwrap();
151                let tgt_map = self.target_degrees.get(&pattern).unwrap();
152                stats.distinct_sources = src_map.len();
153                stats.distinct_targets = tgt_map.len();
154
155                // Recompute averages
156                stats.avg_out_degree = stats.count as f64 / stats.distinct_sources as f64;
157                stats.avg_in_degree = stats.count as f64 / stats.distinct_targets as f64;
158
159                // Update max out degree
160                if new_src_degree > stats.max_out_degree {
161                    stats.max_out_degree = new_src_degree;
162                }
163            }
164        }
165        self.generation += 1;
166    }
167
168    /// Notify the catalog that an edge was deleted
169    pub fn on_edge_deleted(
170        &mut self,
171        source_id: NodeId,
172        src_labels: &[Label],
173        edge_type: &EdgeType,
174        target_id: NodeId,
175        tgt_labels: &[Label],
176    ) {
177        for src_label in src_labels {
178            for tgt_label in tgt_labels {
179                let pattern =
180                    TriplePattern::new(src_label.clone(), edge_type.clone(), tgt_label.clone());
181
182                // Update source degree tracking
183                if let Some(src_map) = self.source_degrees.get_mut(&pattern) {
184                    if let Some(degree) = src_map.get_mut(&source_id) {
185                        *degree = degree.saturating_sub(1);
186                        if *degree == 0 {
187                            src_map.remove(&source_id);
188                        }
189                    }
190                }
191
192                // Update target degree tracking
193                if let Some(tgt_map) = self.target_degrees.get_mut(&pattern) {
194                    if let Some(degree) = tgt_map.get_mut(&target_id) {
195                        *degree = degree.saturating_sub(1);
196                        if *degree == 0 {
197                            tgt_map.remove(&target_id);
198                        }
199                    }
200                }
201
202                // Update triple stats
203                if let Some(stats) = self.triple_stats.get_mut(&pattern) {
204                    stats.count = stats.count.saturating_sub(1);
205
206                    let src_map = self.source_degrees.get(&pattern);
207                    let tgt_map = self.target_degrees.get(&pattern);
208
209                    stats.distinct_sources = src_map.map(|m| m.len()).unwrap_or(0);
210                    stats.distinct_targets = tgt_map.map(|m| m.len()).unwrap_or(0);
211
212                    if stats.distinct_sources > 0 {
213                        stats.avg_out_degree = stats.count as f64 / stats.distinct_sources as f64;
214                    } else {
215                        stats.avg_out_degree = 0.0;
216                    }
217                    if stats.distinct_targets > 0 {
218                        stats.avg_in_degree = stats.count as f64 / stats.distinct_targets as f64;
219                    } else {
220                        stats.avg_in_degree = 0.0;
221                    }
222
223                    // Recompute max_out_degree (need full scan of source degrees)
224                    stats.max_out_degree = src_map
225                        .map(|m| m.values().copied().max().unwrap_or(0))
226                        .unwrap_or(0);
227
228                    // Remove empty triple stats
229                    if stats.count == 0 {
230                        self.triple_stats.remove(&pattern);
231                        self.source_degrees.remove(&pattern);
232                        self.target_degrees.remove(&pattern);
233                    }
234                }
235            }
236        }
237        self.generation += 1;
238    }
239
240    /// Get triple stats for a specific pattern
241    pub fn get_triple_stats(&self, pattern: &TriplePattern) -> Option<&TripleStats> {
242        self.triple_stats.get(pattern)
243    }
244
245    /// Get all triple stats
246    pub fn all_triple_stats(&self) -> &HashMap<TriplePattern, TripleStats> {
247        &self.triple_stats
248    }
249
250    /// Estimate the number of rows from a label scan
251    pub fn estimate_label_scan(&self, label: &Label) -> f64 {
252        self.label_counts.get(label).copied().unwrap_or(0) as f64
253    }
254
255    /// Estimate cardinality after an outgoing expand from source_label via edge_type
256    pub fn estimate_expand_out(&self, source_label: &Label, edge_type: &EdgeType) -> f64 {
257        // Sum avg_out_degree across all target labels for this (source, edge_type)
258        let mut total_degree = 0.0;
259        let mut found = false;
260        for (pattern, stats) in &self.triple_stats {
261            if &pattern.source_label == source_label && &pattern.edge_type == edge_type {
262                total_degree += stats.avg_out_degree;
263                found = true;
264            }
265        }
266        if found {
267            total_degree
268        } else {
269            1.0
270        } // default: assume 1 edge per node
271    }
272
273    /// Estimate cardinality after an incoming expand to target_label via edge_type
274    pub fn estimate_expand_in(&self, target_label: &Label, edge_type: &EdgeType) -> f64 {
275        let mut total_degree = 0.0;
276        let mut found = false;
277        for (pattern, stats) in &self.triple_stats {
278            if &pattern.target_label == target_label && &pattern.edge_type == edge_type {
279                total_degree += stats.avg_in_degree;
280                found = true;
281            }
282        }
283        if found {
284            total_degree
285        } else {
286            1.0
287        }
288    }
289
290    /// Estimate the probability that an edge exists between a specific source and target
291    pub fn estimate_edge_existence(
292        &self,
293        source_label: &Label,
294        edge_type: &EdgeType,
295        target_label: &Label,
296    ) -> f64 {
297        let pattern = TriplePattern::new(
298            source_label.clone(),
299            edge_type.clone(),
300            target_label.clone(),
301        );
302        match self.triple_stats.get(&pattern) {
303            Some(stats) => {
304                let source_count = self.label_counts.get(source_label).copied().unwrap_or(1) as f64;
305                let target_count = self.label_counts.get(target_label).copied().unwrap_or(1) as f64;
306                let possible_pairs = source_count * target_count;
307                if possible_pairs > 0.0 {
308                    (stats.count as f64 / possible_pairs).min(1.0)
309                } else {
310                    0.0
311                }
312            }
313            None => 0.0,
314        }
315    }
316
317    /// Recompute all catalog statistics from scratch using the graph store.
318    /// Used for consistency checks — result should match incrementally maintained stats.
319    pub fn recompute_full(store: &super::store::GraphStore) -> Self {
320        let mut catalog = GraphCatalog::new();
321
322        // Recompute label counts
323        for node in store.all_nodes() {
324            for label in &node.labels {
325                *catalog.label_counts.entry(label.clone()).or_insert(0) += 1;
326            }
327        }
328
329        // Recompute triple stats by iterating all edges
330        for node in store.all_nodes() {
331            let outgoing = store.get_outgoing_edge_targets(node.id);
332            for (_, _, target_id, edge_type) in &outgoing {
333                if let Some(target_node) = store.get_node(*target_id) {
334                    let src_labels: Vec<Label> = node.labels.iter().cloned().collect();
335                    let tgt_labels: Vec<Label> = target_node.labels.iter().cloned().collect();
336                    catalog.on_edge_created(
337                        node.id,
338                        &src_labels,
339                        edge_type,
340                        *target_id,
341                        &tgt_labels,
342                    );
343                    // Undo the generation bump from on_edge_created (we'll set it at the end)
344                    catalog.generation -= 1;
345                }
346            }
347        }
348
349        catalog.generation = 1;
350        catalog
351    }
352
353    /// Format catalog as human-readable text for EXPLAIN output
354    pub fn format(&self) -> String {
355        let mut result = String::new();
356        result.push_str("Triple Statistics:\n");
357
358        let mut patterns: Vec<_> = self.triple_stats.iter().collect();
359        patterns.sort_by(|a, b| b.1.count.cmp(&a.1.count));
360
361        for (pattern, stats) in patterns {
362            result.push_str(&format!(
363                "  (:{})--[:{}]-->(:{}) count={}, avg_out={:.1}, avg_in={:.1}, max_out={}, sources={}, targets={}\n",
364                pattern.source_label.as_str(),
365                pattern.edge_type.as_str(),
366                pattern.target_label.as_str(),
367                stats.count,
368                stats.avg_out_degree,
369                stats.avg_in_degree,
370                stats.max_out_degree,
371                stats.distinct_sources,
372                stats.distinct_targets,
373            ));
374        }
375
376        result
377    }
378
379    /// Reset catalog to empty state
380    pub fn clear(&mut self) {
381        self.label_counts.clear();
382        self.triple_stats.clear();
383        self.source_degrees.clear();
384        self.target_degrees.clear();
385        self.generation = 0;
386    }
387}
388
389#[cfg(test)]
390mod tests {
391    use super::*;
392
393    // ---- TDD: Tests written first, then implementation verified ----
394
395    #[test]
396    fn test_empty_catalog() {
397        let catalog = GraphCatalog::new();
398        assert!(catalog.triple_stats.is_empty());
399        assert!(catalog.label_counts.is_empty());
400        assert_eq!(catalog.generation, 0);
401        assert_eq!(catalog.estimate_label_scan(&Label::new("Person")), 0.0);
402    }
403
404    #[test]
405    fn test_label_tracking() {
406        let mut catalog = GraphCatalog::new();
407        catalog.on_label_added(&Label::new("Person"));
408        catalog.on_label_added(&Label::new("Person"));
409        catalog.on_label_added(&Label::new("Company"));
410
411        assert_eq!(catalog.estimate_label_scan(&Label::new("Person")), 2.0);
412        assert_eq!(catalog.estimate_label_scan(&Label::new("Company")), 1.0);
413        assert_eq!(catalog.estimate_label_scan(&Label::new("Unknown")), 0.0);
414
415        catalog.on_label_removed(&Label::new("Person"));
416        assert_eq!(catalog.estimate_label_scan(&Label::new("Person")), 1.0);
417    }
418
419    #[test]
420    fn test_single_triple() {
421        let mut catalog = GraphCatalog::new();
422
423        let src = NodeId::new(1);
424        let tgt = NodeId::new(2);
425        catalog.on_edge_created(
426            src,
427            &[Label::new("Person")],
428            &EdgeType::new("KNOWS"),
429            tgt,
430            &[Label::new("Person")],
431        );
432
433        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
434        let stats = catalog.get_triple_stats(&pattern).unwrap();
435
436        assert_eq!(stats.count, 1);
437        assert_eq!(stats.distinct_sources, 1);
438        assert_eq!(stats.distinct_targets, 1);
439        assert_eq!(stats.avg_out_degree, 1.0);
440        assert_eq!(stats.avg_in_degree, 1.0);
441        assert_eq!(stats.max_out_degree, 1);
442    }
443
444    #[test]
445    fn test_incremental_edge_created() {
446        let mut catalog = GraphCatalog::new();
447
448        // Person 1 knows Person 2 and Person 3
449        let p1 = NodeId::new(1);
450        let p2 = NodeId::new(2);
451        let p3 = NodeId::new(3);
452
453        catalog.on_edge_created(
454            p1,
455            &[Label::new("Person")],
456            &EdgeType::new("KNOWS"),
457            p2,
458            &[Label::new("Person")],
459        );
460        catalog.on_edge_created(
461            p1,
462            &[Label::new("Person")],
463            &EdgeType::new("KNOWS"),
464            p3,
465            &[Label::new("Person")],
466        );
467
468        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
469        let stats = catalog.get_triple_stats(&pattern).unwrap();
470
471        assert_eq!(stats.count, 2);
472        assert_eq!(stats.distinct_sources, 1); // only p1 is source
473        assert_eq!(stats.distinct_targets, 2); // p2 and p3 are targets
474        assert_eq!(stats.avg_out_degree, 2.0); // 2 edges / 1 source
475        assert_eq!(stats.avg_in_degree, 1.0); // 2 edges / 2 targets
476        assert_eq!(stats.max_out_degree, 2);
477    }
478
479    #[test]
480    fn test_incremental_edge_deleted() {
481        let mut catalog = GraphCatalog::new();
482
483        let p1 = NodeId::new(1);
484        let p2 = NodeId::new(2);
485        let p3 = NodeId::new(3);
486
487        catalog.on_edge_created(
488            p1,
489            &[Label::new("Person")],
490            &EdgeType::new("KNOWS"),
491            p2,
492            &[Label::new("Person")],
493        );
494        catalog.on_edge_created(
495            p1,
496            &[Label::new("Person")],
497            &EdgeType::new("KNOWS"),
498            p3,
499            &[Label::new("Person")],
500        );
501
502        // Delete one edge
503        catalog.on_edge_deleted(
504            p1,
505            &[Label::new("Person")],
506            &EdgeType::new("KNOWS"),
507            p3,
508            &[Label::new("Person")],
509        );
510
511        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
512        let stats = catalog.get_triple_stats(&pattern).unwrap();
513
514        assert_eq!(stats.count, 1);
515        assert_eq!(stats.distinct_sources, 1);
516        assert_eq!(stats.distinct_targets, 1); // p3 removed
517        assert_eq!(stats.avg_out_degree, 1.0);
518        assert_eq!(stats.max_out_degree, 1);
519    }
520
521    #[test]
522    fn test_edge_deleted_removes_empty_pattern() {
523        let mut catalog = GraphCatalog::new();
524
525        let p1 = NodeId::new(1);
526        let p2 = NodeId::new(2);
527
528        catalog.on_edge_created(
529            p1,
530            &[Label::new("Person")],
531            &EdgeType::new("KNOWS"),
532            p2,
533            &[Label::new("Person")],
534        );
535        catalog.on_edge_deleted(
536            p1,
537            &[Label::new("Person")],
538            &EdgeType::new("KNOWS"),
539            p2,
540            &[Label::new("Person")],
541        );
542
543        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
544        assert!(catalog.get_triple_stats(&pattern).is_none());
545    }
546
547    #[test]
548    fn test_multi_label_nodes() {
549        let mut catalog = GraphCatalog::new();
550
551        // Source has labels [Person, Employee], target has labels [Person, Manager]
552        let p1 = NodeId::new(1);
553        let p2 = NodeId::new(2);
554
555        catalog.on_edge_created(
556            p1,
557            &[Label::new("Person"), Label::new("Employee")],
558            &EdgeType::new("REPORTS_TO"),
559            p2,
560            &[Label::new("Person"), Label::new("Manager")],
561        );
562
563        // Should create 4 triple entries: Person->Person, Person->Manager, Employee->Person, Employee->Manager
564        assert_eq!(catalog.triple_stats.len(), 4);
565
566        let pp = TriplePattern::new("Person", "REPORTS_TO", "Person");
567        assert!(catalog.get_triple_stats(&pp).is_some());
568
569        let pm = TriplePattern::new("Person", "REPORTS_TO", "Manager");
570        assert!(catalog.get_triple_stats(&pm).is_some());
571
572        let ep = TriplePattern::new("Employee", "REPORTS_TO", "Person");
573        assert!(catalog.get_triple_stats(&ep).is_some());
574
575        let em = TriplePattern::new("Employee", "REPORTS_TO", "Manager");
576        assert!(catalog.get_triple_stats(&em).is_some());
577    }
578
579    #[test]
580    fn test_max_out_degree_tracking() {
581        let mut catalog = GraphCatalog::new();
582
583        let p1 = NodeId::new(1);
584        let p2 = NodeId::new(2);
585        let p3 = NodeId::new(3);
586        let p4 = NodeId::new(4);
587
588        // p1 knows p2, p3, p4 (degree 3)
589        catalog.on_edge_created(
590            p1,
591            &[Label::new("Person")],
592            &EdgeType::new("KNOWS"),
593            p2,
594            &[Label::new("Person")],
595        );
596        catalog.on_edge_created(
597            p1,
598            &[Label::new("Person")],
599            &EdgeType::new("KNOWS"),
600            p3,
601            &[Label::new("Person")],
602        );
603        catalog.on_edge_created(
604            p1,
605            &[Label::new("Person")],
606            &EdgeType::new("KNOWS"),
607            p4,
608            &[Label::new("Person")],
609        );
610
611        // p2 knows p3 (degree 1)
612        catalog.on_edge_created(
613            p2,
614            &[Label::new("Person")],
615            &EdgeType::new("KNOWS"),
616            p3,
617            &[Label::new("Person")],
618        );
619
620        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
621        let stats = catalog.get_triple_stats(&pattern).unwrap();
622
623        assert_eq!(stats.max_out_degree, 3);
624        assert_eq!(stats.count, 4);
625        assert_eq!(stats.distinct_sources, 2); // p1 and p2
626    }
627
628    #[test]
629    fn test_estimate_expand_out() {
630        let mut catalog = GraphCatalog::new();
631
632        let p1 = NodeId::new(1);
633        let p2 = NodeId::new(2);
634        let c1 = NodeId::new(3);
635
636        // Each Person knows 2 Persons
637        catalog.on_edge_created(
638            p1,
639            &[Label::new("Person")],
640            &EdgeType::new("KNOWS"),
641            p2,
642            &[Label::new("Person")],
643        );
644        catalog.on_edge_created(
645            p2,
646            &[Label::new("Person")],
647            &EdgeType::new("KNOWS"),
648            p1,
649            &[Label::new("Person")],
650        );
651
652        // Each Person works at 1 Company
653        catalog.on_edge_created(
654            p1,
655            &[Label::new("Person")],
656            &EdgeType::new("WORKS_AT"),
657            c1,
658            &[Label::new("Company")],
659        );
660
661        let knows_degree =
662            catalog.estimate_expand_out(&Label::new("Person"), &EdgeType::new("KNOWS"));
663        assert_eq!(knows_degree, 1.0); // 2 edges / 2 sources = 1.0
664
665        let works_degree =
666            catalog.estimate_expand_out(&Label::new("Person"), &EdgeType::new("WORKS_AT"));
667        assert_eq!(works_degree, 1.0); // 1 edge / 1 source = 1.0
668    }
669
670    #[test]
671    fn test_estimate_expand_in() {
672        let mut catalog = GraphCatalog::new();
673
674        let p1 = NodeId::new(1);
675        let p2 = NodeId::new(2);
676        let p3 = NodeId::new(3);
677        let c1 = NodeId::new(4);
678
679        // 3 Person nodes WORKS_AT 1 Company
680        catalog.on_edge_created(
681            p1,
682            &[Label::new("Person")],
683            &EdgeType::new("WORKS_AT"),
684            c1,
685            &[Label::new("Company")],
686        );
687        catalog.on_edge_created(
688            p2,
689            &[Label::new("Person")],
690            &EdgeType::new("WORKS_AT"),
691            c1,
692            &[Label::new("Company")],
693        );
694        catalog.on_edge_created(
695            p3,
696            &[Label::new("Person")],
697            &EdgeType::new("WORKS_AT"),
698            c1,
699            &[Label::new("Company")],
700        );
701
702        let in_degree =
703            catalog.estimate_expand_in(&Label::new("Company"), &EdgeType::new("WORKS_AT"));
704        assert_eq!(in_degree, 3.0); // 3 edges / 1 target = 3.0
705    }
706
707    #[test]
708    fn test_estimate_edge_existence() {
709        let mut catalog = GraphCatalog::new();
710
711        // 2 Person nodes, 1 Company
712        catalog.on_label_added(&Label::new("Person"));
713        catalog.on_label_added(&Label::new("Person"));
714        catalog.on_label_added(&Label::new("Company"));
715
716        let p1 = NodeId::new(1);
717        let c1 = NodeId::new(3);
718
719        // 1 of 2 persons works at the company
720        catalog.on_edge_created(
721            p1,
722            &[Label::new("Person")],
723            &EdgeType::new("WORKS_AT"),
724            c1,
725            &[Label::new("Company")],
726        );
727
728        let prob = catalog.estimate_edge_existence(
729            &Label::new("Person"),
730            &EdgeType::new("WORKS_AT"),
731            &Label::new("Company"),
732        );
733        // 1 edge / (2 persons * 1 company) = 0.5
734        assert!((prob - 0.5).abs() < 1e-10);
735
736        // Non-existent pattern
737        let prob = catalog.estimate_edge_existence(
738            &Label::new("Company"),
739            &EdgeType::new("KNOWS"),
740            &Label::new("Person"),
741        );
742        assert_eq!(prob, 0.0);
743    }
744
745    #[test]
746    fn test_generation_increments() {
747        let mut catalog = GraphCatalog::new();
748        assert_eq!(catalog.generation, 0);
749
750        catalog.on_label_added(&Label::new("Person"));
751        assert_eq!(catalog.generation, 1);
752
753        let p1 = NodeId::new(1);
754        let p2 = NodeId::new(2);
755        catalog.on_edge_created(
756            p1,
757            &[Label::new("Person")],
758            &EdgeType::new("KNOWS"),
759            p2,
760            &[Label::new("Person")],
761        );
762        assert_eq!(catalog.generation, 2);
763
764        catalog.on_edge_deleted(
765            p1,
766            &[Label::new("Person")],
767            &EdgeType::new("KNOWS"),
768            p2,
769            &[Label::new("Person")],
770        );
771        assert_eq!(catalog.generation, 3);
772    }
773
774    #[test]
775    fn test_format_output() {
776        let mut catalog = GraphCatalog::new();
777        let p1 = NodeId::new(1);
778        let p2 = NodeId::new(2);
779
780        catalog.on_edge_created(
781            p1,
782            &[Label::new("Person")],
783            &EdgeType::new("KNOWS"),
784            p2,
785            &[Label::new("Person")],
786        );
787
788        let output = catalog.format();
789        assert!(output.contains("Triple Statistics:"));
790        assert!(output.contains("Person"));
791        assert!(output.contains("KNOWS"));
792        assert!(output.contains("count=1"));
793    }
794
795    #[test]
796    fn test_clear() {
797        let mut catalog = GraphCatalog::new();
798        catalog.on_label_added(&Label::new("Person"));
799        let p1 = NodeId::new(1);
800        let p2 = NodeId::new(2);
801        catalog.on_edge_created(
802            p1,
803            &[Label::new("Person")],
804            &EdgeType::new("KNOWS"),
805            p2,
806            &[Label::new("Person")],
807        );
808
809        catalog.clear();
810
811        assert!(catalog.label_counts.is_empty());
812        assert!(catalog.triple_stats.is_empty());
813        assert_eq!(catalog.generation, 0);
814    }
815
816    #[test]
817    fn test_recompute_full_matches_incremental() {
818        use crate::graph::store::GraphStore;
819
820        let mut store = GraphStore::new();
821        let p1 = store.create_node("Person");
822        let p2 = store.create_node("Person");
823        let p3 = store.create_node("Person");
824        let c1 = store.create_node("Company");
825
826        store.create_edge(p1, p2, "KNOWS").unwrap();
827        store.create_edge(p2, p3, "KNOWS").unwrap();
828        store.create_edge(p1, c1, "WORKS_AT").unwrap();
829
830        let recomputed = GraphCatalog::recompute_full(&store);
831
832        // Should have 2 triple patterns: Person-KNOWS->Person, Person-WORKS_AT->Company
833        assert_eq!(recomputed.all_triple_stats().len(), 2);
834
835        let knows = TriplePattern::new("Person", "KNOWS", "Person");
836        let knows_stats = recomputed.get_triple_stats(&knows).unwrap();
837        assert_eq!(knows_stats.count, 2);
838
839        let works = TriplePattern::new("Person", "WORKS_AT", "Company");
840        let works_stats = recomputed.get_triple_stats(&works).unwrap();
841        assert_eq!(works_stats.count, 1);
842
843        // Verify label counts
844        assert_eq!(
845            recomputed
846                .label_counts
847                .get(&Label::new("Person"))
848                .copied()
849                .unwrap_or(0),
850            3
851        );
852        assert_eq!(
853            recomputed
854                .label_counts
855                .get(&Label::new("Company"))
856                .copied()
857                .unwrap_or(0),
858            1
859        );
860    }
861
862    #[test]
863    fn test_catalog_maintained_on_create_delete_edge() {
864        use crate::graph::store::GraphStore;
865
866        let mut store = GraphStore::new();
867        let p1 = store.create_node("Person");
868        let p2 = store.create_node("Person");
869
870        let edge_id = store.create_edge(p1, p2, "KNOWS").unwrap();
871
872        // Catalog should reflect the edge
873        let pattern = TriplePattern::new("Person", "KNOWS", "Person");
874        let stats = store.catalog().get_triple_stats(&pattern).unwrap();
875        assert_eq!(stats.count, 1);
876
877        // Delete the edge
878        store.delete_edge(edge_id).unwrap();
879
880        // Catalog should be empty
881        assert!(store.catalog().get_triple_stats(&pattern).is_none());
882    }
883
884    #[test]
885    fn test_estimate_expand_unknown_returns_default() {
886        let catalog = GraphCatalog::new();
887
888        // Unknown patterns should return 1.0 (default assumption)
889        let out = catalog.estimate_expand_out(&Label::new("Unknown"), &EdgeType::new("UNKNOWN"));
890        assert_eq!(out, 1.0);
891
892        let in_d = catalog.estimate_expand_in(&Label::new("Unknown"), &EdgeType::new("UNKNOWN"));
893        assert_eq!(in_d, 1.0);
894    }
895
896    #[test]
897    fn test_asymmetric_graph_statistics() {
898        // Model the "100 Companies vs 1M Persons" scenario from ADR-015
899        let mut catalog = GraphCatalog::new();
900
901        // Simulate: 10 persons each working at 1 company
902        for i in 0..10 {
903            catalog.on_edge_created(
904                NodeId::new(i),
905                &[Label::new("Person")],
906                &EdgeType::new("WORKS_AT"),
907                NodeId::new(100), // all work at same company
908                &[Label::new("Company")],
909            );
910        }
911
912        // Outgoing from Person via WORKS_AT: avg_out = 1.0 (each person -> 1 company)
913        let out = catalog.estimate_expand_out(&Label::new("Person"), &EdgeType::new("WORKS_AT"));
914        assert_eq!(out, 1.0);
915
916        // Incoming to Company via WORKS_AT: avg_in = 10.0 (one company <- 10 persons)
917        let in_d = catalog.estimate_expand_in(&Label::new("Company"), &EdgeType::new("WORKS_AT"));
918        assert_eq!(in_d, 10.0);
919
920        // This tells the planner: starting from Company and expanding incoming is 10x more expensive
921        // than starting from Person and expanding outgoing
922    }
923}