Skip to main content

agentic_codebase/workspace/
compare.rs

1//! Multi-Codebase Compare Enhancement — Invention 10.
2//!
3//! Goes beyond symbol-level comparison to structural, conceptual, and
4//! pattern-level differences between codebases loaded in a workspace.
5
6use std::collections::HashMap;
7
8use serde::{Deserialize, Serialize};
9
10use crate::graph::CodeGraph;
11use crate::types::CodeUnitType;
12
13// ── Types ────────────────────────────────────────────────────────────────────
14
15/// Structural diff between two codebases.
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct StructuralDiff {
18    /// Modules/directories in A but not B.
19    pub only_in_a: Vec<String>,
20    /// Modules/directories in B but not A.
21    pub only_in_b: Vec<String>,
22    /// Modules present in both but with different structure.
23    pub modified: Vec<ModuleDiff>,
24}
25
26/// Diff of a single module between two codebases.
27#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct ModuleDiff {
29    /// Module name or path.
30    pub module: String,
31    /// Symbols in A but not B.
32    pub symbols_only_a: Vec<String>,
33    /// Symbols in B but not A.
34    pub symbols_only_b: Vec<String>,
35    /// Symbols present in both (possibly with different types/signatures).
36    pub common_symbols: Vec<String>,
37}
38
39/// Conceptual diff — how high-level concepts differ.
40#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ConceptualDiff {
42    /// Concept name (e.g., "authentication", "error_handling").
43    pub concept: String,
44    /// How it appears in codebase A.
45    pub in_a: Vec<String>,
46    /// How it appears in codebase B.
47    pub in_b: Vec<String>,
48    /// Key differences.
49    pub differences: Vec<String>,
50}
51
52/// Pattern diff — how design patterns differ.
53#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct PatternDiff {
55    /// Pattern name.
56    pub pattern: String,
57    /// Instances in A.
58    pub instances_a: usize,
59    /// Instances in B.
60    pub instances_b: usize,
61    /// Notable variations between A and B.
62    pub variations: Vec<PatternVariation>,
63}
64
65/// A variation in how a pattern is applied.
66#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct PatternVariation {
68    /// Description of the variation.
69    pub description: String,
70    /// Which codebase (A or B).
71    pub source: String,
72}
73
74/// Full comparison result between two codebases.
75#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct CodebaseComparison {
77    /// Label for codebase A.
78    pub label_a: String,
79    /// Label for codebase B.
80    pub label_b: String,
81    /// Structural differences.
82    pub structural: StructuralDiff,
83    /// Conceptual differences.
84    pub conceptual: Vec<ConceptualDiff>,
85    /// Pattern differences.
86    pub patterns: Vec<PatternDiff>,
87    /// Summary statistics.
88    pub summary: ComparisonSummary,
89}
90
91/// Summary statistics of a comparison.
92#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ComparisonSummary {
94    /// Total units in A.
95    pub units_a: usize,
96    /// Total units in B.
97    pub units_b: usize,
98    /// Number of common symbols.
99    pub common_symbols: usize,
100    /// Symbols unique to A.
101    pub unique_to_a: usize,
102    /// Symbols unique to B.
103    pub unique_to_b: usize,
104    /// Similarity score 0.0–1.0.
105    pub similarity: f64,
106}
107
108/// A migration step for porting from A to B.
109#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct MigrationStep {
111    /// Order of this step.
112    pub order: usize,
113    /// What to migrate.
114    pub description: String,
115    /// Source symbols involved.
116    pub source_symbols: Vec<String>,
117    /// Estimated effort (low/medium/high).
118    pub effort: String,
119    /// Dependencies (other steps that must come first).
120    pub dependencies: Vec<usize>,
121}
122
123// ── CodebaseComparer ─────────────────────────────────────────────────────────
124
125/// Compares two codebases at multiple levels.
126pub struct CodebaseComparer<'a, 'b> {
127    graph_a: &'a CodeGraph,
128    graph_b: &'b CodeGraph,
129    label_a: String,
130    label_b: String,
131}
132
133impl<'a, 'b> CodebaseComparer<'a, 'b> {
134    pub fn new(
135        graph_a: &'a CodeGraph,
136        label_a: &str,
137        graph_b: &'b CodeGraph,
138        label_b: &str,
139    ) -> Self {
140        Self {
141            graph_a,
142            graph_b,
143            label_a: label_a.to_string(),
144            label_b: label_b.to_string(),
145        }
146    }
147
148    /// Full structural + conceptual + pattern comparison.
149    pub fn compare(&self) -> CodebaseComparison {
150        let structural = self.compare_structural();
151        let conceptual = self.compare_conceptual();
152        let patterns = self.compare_patterns();
153
154        // Compute summary
155        let names_a: std::collections::HashSet<String> = self
156            .graph_a
157            .units()
158            .iter()
159            .map(|u| u.name.to_lowercase())
160            .collect();
161        let names_b: std::collections::HashSet<String> = self
162            .graph_b
163            .units()
164            .iter()
165            .map(|u| u.name.to_lowercase())
166            .collect();
167
168        let common: std::collections::HashSet<&String> = names_a.intersection(&names_b).collect();
169        let unique_a = names_a.len() - common.len();
170        let unique_b = names_b.len() - common.len();
171
172        let total = names_a.len() + names_b.len();
173        let similarity = if total > 0 {
174            (common.len() * 2) as f64 / total as f64
175        } else {
176            0.0
177        };
178
179        CodebaseComparison {
180            label_a: self.label_a.clone(),
181            label_b: self.label_b.clone(),
182            structural,
183            conceptual,
184            patterns,
185            summary: ComparisonSummary {
186                units_a: self.graph_a.unit_count(),
187                units_b: self.graph_b.unit_count(),
188                common_symbols: common.len(),
189                unique_to_a: unique_a,
190                unique_to_b: unique_b,
191                similarity,
192            },
193        }
194    }
195
196    /// Compare how a specific concept is implemented across both codebases.
197    pub fn compare_concept(&self, concept: &str) -> ConceptualDiff {
198        let keywords: Vec<&str> = concept.split_whitespace().collect();
199
200        let find_matches = |graph: &CodeGraph| -> Vec<String> {
201            graph
202                .units()
203                .iter()
204                .filter(|u| {
205                    let name_lower = u.name.to_lowercase();
206                    keywords
207                        .iter()
208                        .any(|kw| name_lower.contains(&kw.to_lowercase()))
209                })
210                .map(|u| format!("{} ({})", u.name, u.unit_type.label()))
211                .collect()
212        };
213
214        let in_a = find_matches(self.graph_a);
215        let in_b = find_matches(self.graph_b);
216
217        let mut differences = Vec::new();
218        if in_a.is_empty() && !in_b.is_empty() {
219            differences.push(format!("'{}' not found in {}", concept, self.label_a));
220        } else if !in_a.is_empty() && in_b.is_empty() {
221            differences.push(format!("'{}' not found in {}", concept, self.label_b));
222        } else if in_a.len() != in_b.len() {
223            differences.push(format!(
224                "Different number of implementations: {} in {}, {} in {}",
225                in_a.len(),
226                self.label_a,
227                in_b.len(),
228                self.label_b
229            ));
230        }
231
232        ConceptualDiff {
233            concept: concept.to_string(),
234            in_a,
235            in_b,
236            differences,
237        }
238    }
239
240    /// Generate an ordered migration plan from A to B.
241    pub fn migration_plan(&self) -> Vec<MigrationStep> {
242        let names_a: std::collections::HashSet<String> = self
243            .graph_a
244            .units()
245            .iter()
246            .map(|u| u.name.clone())
247            .collect();
248        let names_b: std::collections::HashSet<String> = self
249            .graph_b
250            .units()
251            .iter()
252            .map(|u| u.name.clone())
253            .collect();
254
255        let mut steps = Vec::new();
256        let mut order = 1;
257
258        // Step 1: Types first (they're dependencies for functions)
259        let types_to_port: Vec<String> = self
260            .graph_a
261            .units()
262            .iter()
263            .filter(|u| u.unit_type == CodeUnitType::Type && !names_b.contains(&u.name))
264            .map(|u| u.name.clone())
265            .collect();
266
267        if !types_to_port.is_empty() {
268            steps.push(MigrationStep {
269                order,
270                description: format!("Port {} type definitions", types_to_port.len()),
271                source_symbols: types_to_port,
272                effort: "medium".to_string(),
273                dependencies: Vec::new(),
274            });
275            order += 1;
276        }
277
278        // Step 2: Functions
279        let fns_to_port: Vec<String> = self
280            .graph_a
281            .units()
282            .iter()
283            .filter(|u| u.unit_type == CodeUnitType::Function && !names_b.contains(&u.name))
284            .map(|u| u.name.clone())
285            .collect();
286
287        if !fns_to_port.is_empty() {
288            let dep = if order > 1 { vec![1] } else { Vec::new() };
289            steps.push(MigrationStep {
290                order,
291                description: format!("Port {} functions", fns_to_port.len()),
292                source_symbols: fns_to_port,
293                effort: "high".to_string(),
294                dependencies: dep,
295            });
296            order += 1;
297        }
298
299        // Step 3: Tests
300        let tests_to_port: Vec<String> = self
301            .graph_a
302            .units()
303            .iter()
304            .filter(|u| u.unit_type == CodeUnitType::Test && !names_b.contains(&u.name))
305            .map(|u| u.name.clone())
306            .collect();
307
308        if !tests_to_port.is_empty() {
309            let dep = if order > 1 {
310                vec![order - 1]
311            } else {
312                Vec::new()
313            };
314            steps.push(MigrationStep {
315                order,
316                description: format!("Port {} tests", tests_to_port.len()),
317                source_symbols: tests_to_port,
318                effort: "medium".to_string(),
319                dependencies: dep,
320            });
321        }
322
323        // Step 4: Remaining symbols not already covered
324        let covered: std::collections::HashSet<String> = steps
325            .iter()
326            .flat_map(|s| s.source_symbols.iter().cloned())
327            .collect();
328
329        let remaining: Vec<String> = names_a
330            .difference(&names_b)
331            .filter(|n| !covered.contains(*n))
332            .cloned()
333            .collect();
334
335        if !remaining.is_empty() {
336            let prev_order = steps.last().map(|s| s.order).unwrap_or(0);
337            steps.push(MigrationStep {
338                order: prev_order + 1,
339                description: format!("Port {} remaining symbols", remaining.len()),
340                source_symbols: remaining,
341                effort: "low".to_string(),
342                dependencies: if prev_order > 0 {
343                    vec![prev_order]
344                } else {
345                    Vec::new()
346                },
347            });
348        }
349
350        steps
351    }
352
353    // ── Internal ─────────────────────────────────────────────────────────
354
355    fn compare_structural(&self) -> StructuralDiff {
356        let dirs_a = self.extract_directories(self.graph_a);
357        let dirs_b = self.extract_directories(self.graph_b);
358
359        let only_in_a: Vec<String> = dirs_a
360            .keys()
361            .filter(|d| !dirs_b.contains_key(*d))
362            .cloned()
363            .collect();
364        let only_in_b: Vec<String> = dirs_b
365            .keys()
366            .filter(|d| !dirs_a.contains_key(*d))
367            .cloned()
368            .collect();
369
370        let mut modified = Vec::new();
371        for (dir, syms_a) in &dirs_a {
372            if let Some(syms_b) = dirs_b.get(dir) {
373                let set_a: std::collections::HashSet<&String> = syms_a.iter().collect();
374                let set_b: std::collections::HashSet<&String> = syms_b.iter().collect();
375
376                let only_a: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
377                let only_b_list: Vec<String> =
378                    set_b.difference(&set_a).map(|s| (*s).clone()).collect();
379                let common: Vec<String> =
380                    set_a.intersection(&set_b).map(|s| (*s).clone()).collect();
381
382                if !only_a.is_empty() || !only_b_list.is_empty() {
383                    modified.push(ModuleDiff {
384                        module: dir.clone(),
385                        symbols_only_a: only_a,
386                        symbols_only_b: only_b_list,
387                        common_symbols: common,
388                    });
389                }
390            }
391        }
392
393        StructuralDiff {
394            only_in_a,
395            only_in_b,
396            modified,
397        }
398    }
399
400    fn compare_conceptual(&self) -> Vec<ConceptualDiff> {
401        let concepts = [
402            "auth", "payment", "user", "database", "api", "error", "config", "cache", "log",
403        ];
404
405        concepts
406            .iter()
407            .map(|c| self.compare_concept(c))
408            .filter(|d| !d.in_a.is_empty() || !d.in_b.is_empty())
409            .collect()
410    }
411
412    fn compare_patterns(&self) -> Vec<PatternDiff> {
413        let suffixes = [
414            "handler",
415            "service",
416            "controller",
417            "repository",
418            "factory",
419            "manager",
420        ];
421        let mut diffs = Vec::new();
422
423        for suffix in &suffixes {
424            let count_a = self
425                .graph_a
426                .units()
427                .iter()
428                .filter(|u| u.name.to_lowercase().ends_with(suffix))
429                .count();
430            let count_b = self
431                .graph_b
432                .units()
433                .iter()
434                .filter(|u| u.name.to_lowercase().ends_with(suffix))
435                .count();
436
437            if count_a > 0 || count_b > 0 {
438                let mut variations = Vec::new();
439                if count_a > 0 && count_b == 0 {
440                    variations.push(PatternVariation {
441                        description: format!("*_{} pattern only used in {}", suffix, self.label_a),
442                        source: self.label_a.clone(),
443                    });
444                } else if count_b > 0 && count_a == 0 {
445                    variations.push(PatternVariation {
446                        description: format!("*_{} pattern only used in {}", suffix, self.label_b),
447                        source: self.label_b.clone(),
448                    });
449                }
450
451                diffs.push(PatternDiff {
452                    pattern: format!("*_{}", suffix),
453                    instances_a: count_a,
454                    instances_b: count_b,
455                    variations,
456                });
457            }
458        }
459
460        diffs
461    }
462
463    fn extract_directories(&self, graph: &CodeGraph) -> HashMap<String, Vec<String>> {
464        let mut dirs: HashMap<String, Vec<String>> = HashMap::new();
465        for unit in graph.units() {
466            let dir = unit
467                .file_path
468                .parent()
469                .map(|p| p.display().to_string())
470                .unwrap_or_default();
471            dirs.entry(dir).or_default().push(unit.name.clone());
472        }
473        dirs
474    }
475}
476
477// ── Tests ────────────────────────────────────────────────────────────────────
478
479#[cfg(test)]
480mod tests {
481    use super::*;
482    use crate::types::{CodeUnit, CodeUnitType, Language, Span};
483    use std::path::PathBuf;
484
485    fn graph_a() -> CodeGraph {
486        let mut g = CodeGraph::with_default_dimension();
487        g.add_unit(CodeUnit::new(
488            CodeUnitType::Function,
489            Language::Rust,
490            "process_payment".to_string(),
491            "billing::process_payment".to_string(),
492            PathBuf::from("src/billing.rs"),
493            Span::new(1, 0, 20, 0),
494        ));
495        g.add_unit(CodeUnit::new(
496            CodeUnitType::Type,
497            Language::Rust,
498            "PaymentResult".to_string(),
499            "billing::PaymentResult".to_string(),
500            PathBuf::from("src/billing.rs"),
501            Span::new(21, 0, 30, 0),
502        ));
503        g.add_unit(CodeUnit::new(
504            CodeUnitType::Function,
505            Language::Rust,
506            "auth_user".to_string(),
507            "auth::auth_user".to_string(),
508            PathBuf::from("src/auth.rs"),
509            Span::new(1, 0, 15, 0),
510        ));
511        g
512    }
513
514    fn graph_b() -> CodeGraph {
515        let mut g = CodeGraph::with_default_dimension();
516        g.add_unit(CodeUnit::new(
517            CodeUnitType::Function,
518            Language::Rust,
519            "process_payment".to_string(),
520            "billing::process_payment".to_string(),
521            PathBuf::from("src/billing.rs"),
522            Span::new(1, 0, 25, 0),
523        ));
524        g.add_unit(CodeUnit::new(
525            CodeUnitType::Function,
526            Language::Rust,
527            "validate_payment".to_string(),
528            "billing::validate_payment".to_string(),
529            PathBuf::from("src/billing.rs"),
530            Span::new(26, 0, 40, 0),
531        ));
532        g
533    }
534
535    #[test]
536    fn compare_finds_differences() {
537        let a = graph_a();
538        let b = graph_b();
539        let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
540        let result = comparer.compare();
541
542        assert_eq!(result.summary.units_a, 3);
543        assert_eq!(result.summary.units_b, 2);
544        assert!(result.summary.common_symbols >= 1); // process_payment
545    }
546
547    #[test]
548    fn compare_concept() {
549        let a = graph_a();
550        let b = graph_b();
551        let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
552        let diff = comparer.compare_concept("payment");
553
554        assert!(!diff.in_a.is_empty());
555        assert!(!diff.in_b.is_empty());
556    }
557
558    #[test]
559    fn migration_plan_orders_types_first() {
560        let a = graph_a();
561        let b = graph_b();
562        let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
563        let plan = comparer.migration_plan();
564
565        assert!(!plan.is_empty());
566        // Types should come before functions
567        if plan.len() >= 2 {
568            assert!(plan[0].description.contains("type"));
569        }
570    }
571}