1use std::collections::HashMap;
7
8use serde::{Deserialize, Serialize};
9
10use crate::graph::CodeGraph;
11use crate::types::CodeUnitType;
12
13#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct StructuralDiff {
18 pub only_in_a: Vec<String>,
20 pub only_in_b: Vec<String>,
22 pub modified: Vec<ModuleDiff>,
24}
25
26#[derive(Debug, Clone, Serialize, Deserialize)]
28pub struct ModuleDiff {
29 pub module: String,
31 pub symbols_only_a: Vec<String>,
33 pub symbols_only_b: Vec<String>,
35 pub common_symbols: Vec<String>,
37}
38
39#[derive(Debug, Clone, Serialize, Deserialize)]
41pub struct ConceptualDiff {
42 pub concept: String,
44 pub in_a: Vec<String>,
46 pub in_b: Vec<String>,
48 pub differences: Vec<String>,
50}
51
52#[derive(Debug, Clone, Serialize, Deserialize)]
54pub struct PatternDiff {
55 pub pattern: String,
57 pub instances_a: usize,
59 pub instances_b: usize,
61 pub variations: Vec<PatternVariation>,
63}
64
65#[derive(Debug, Clone, Serialize, Deserialize)]
67pub struct PatternVariation {
68 pub description: String,
70 pub source: String,
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
76pub struct CodebaseComparison {
77 pub label_a: String,
79 pub label_b: String,
81 pub structural: StructuralDiff,
83 pub conceptual: Vec<ConceptualDiff>,
85 pub patterns: Vec<PatternDiff>,
87 pub summary: ComparisonSummary,
89}
90
91#[derive(Debug, Clone, Serialize, Deserialize)]
93pub struct ComparisonSummary {
94 pub units_a: usize,
96 pub units_b: usize,
98 pub common_symbols: usize,
100 pub unique_to_a: usize,
102 pub unique_to_b: usize,
104 pub similarity: f64,
106}
107
108#[derive(Debug, Clone, Serialize, Deserialize)]
110pub struct MigrationStep {
111 pub order: usize,
113 pub description: String,
115 pub source_symbols: Vec<String>,
117 pub effort: String,
119 pub dependencies: Vec<usize>,
121}
122
123pub struct CodebaseComparer<'a, 'b> {
127 graph_a: &'a CodeGraph,
128 graph_b: &'b CodeGraph,
129 label_a: String,
130 label_b: String,
131}
132
133impl<'a, 'b> CodebaseComparer<'a, 'b> {
134 pub fn new(
135 graph_a: &'a CodeGraph,
136 label_a: &str,
137 graph_b: &'b CodeGraph,
138 label_b: &str,
139 ) -> Self {
140 Self {
141 graph_a,
142 graph_b,
143 label_a: label_a.to_string(),
144 label_b: label_b.to_string(),
145 }
146 }
147
148 pub fn compare(&self) -> CodebaseComparison {
150 let structural = self.compare_structural();
151 let conceptual = self.compare_conceptual();
152 let patterns = self.compare_patterns();
153
154 let names_a: std::collections::HashSet<String> = self
156 .graph_a
157 .units()
158 .iter()
159 .map(|u| u.name.to_lowercase())
160 .collect();
161 let names_b: std::collections::HashSet<String> = self
162 .graph_b
163 .units()
164 .iter()
165 .map(|u| u.name.to_lowercase())
166 .collect();
167
168 let common: std::collections::HashSet<&String> = names_a.intersection(&names_b).collect();
169 let unique_a = names_a.len() - common.len();
170 let unique_b = names_b.len() - common.len();
171
172 let total = names_a.len() + names_b.len();
173 let similarity = if total > 0 {
174 (common.len() * 2) as f64 / total as f64
175 } else {
176 0.0
177 };
178
179 CodebaseComparison {
180 label_a: self.label_a.clone(),
181 label_b: self.label_b.clone(),
182 structural,
183 conceptual,
184 patterns,
185 summary: ComparisonSummary {
186 units_a: self.graph_a.unit_count(),
187 units_b: self.graph_b.unit_count(),
188 common_symbols: common.len(),
189 unique_to_a: unique_a,
190 unique_to_b: unique_b,
191 similarity,
192 },
193 }
194 }
195
196 pub fn compare_concept(&self, concept: &str) -> ConceptualDiff {
198 let keywords: Vec<&str> = concept.split_whitespace().collect();
199
200 let find_matches = |graph: &CodeGraph| -> Vec<String> {
201 graph
202 .units()
203 .iter()
204 .filter(|u| {
205 let name_lower = u.name.to_lowercase();
206 keywords
207 .iter()
208 .any(|kw| name_lower.contains(&kw.to_lowercase()))
209 })
210 .map(|u| format!("{} ({})", u.name, u.unit_type.label()))
211 .collect()
212 };
213
214 let in_a = find_matches(self.graph_a);
215 let in_b = find_matches(self.graph_b);
216
217 let mut differences = Vec::new();
218 if in_a.is_empty() && !in_b.is_empty() {
219 differences.push(format!("'{}' not found in {}", concept, self.label_a));
220 } else if !in_a.is_empty() && in_b.is_empty() {
221 differences.push(format!("'{}' not found in {}", concept, self.label_b));
222 } else if in_a.len() != in_b.len() {
223 differences.push(format!(
224 "Different number of implementations: {} in {}, {} in {}",
225 in_a.len(),
226 self.label_a,
227 in_b.len(),
228 self.label_b
229 ));
230 }
231
232 ConceptualDiff {
233 concept: concept.to_string(),
234 in_a,
235 in_b,
236 differences,
237 }
238 }
239
240 pub fn migration_plan(&self) -> Vec<MigrationStep> {
242 let names_a: std::collections::HashSet<String> = self
243 .graph_a
244 .units()
245 .iter()
246 .map(|u| u.name.clone())
247 .collect();
248 let names_b: std::collections::HashSet<String> = self
249 .graph_b
250 .units()
251 .iter()
252 .map(|u| u.name.clone())
253 .collect();
254
255 let mut steps = Vec::new();
256 let mut order = 1;
257
258 let types_to_port: Vec<String> = self
260 .graph_a
261 .units()
262 .iter()
263 .filter(|u| u.unit_type == CodeUnitType::Type && !names_b.contains(&u.name))
264 .map(|u| u.name.clone())
265 .collect();
266
267 if !types_to_port.is_empty() {
268 steps.push(MigrationStep {
269 order,
270 description: format!("Port {} type definitions", types_to_port.len()),
271 source_symbols: types_to_port,
272 effort: "medium".to_string(),
273 dependencies: Vec::new(),
274 });
275 order += 1;
276 }
277
278 let fns_to_port: Vec<String> = self
280 .graph_a
281 .units()
282 .iter()
283 .filter(|u| u.unit_type == CodeUnitType::Function && !names_b.contains(&u.name))
284 .map(|u| u.name.clone())
285 .collect();
286
287 if !fns_to_port.is_empty() {
288 let dep = if order > 1 { vec![1] } else { Vec::new() };
289 steps.push(MigrationStep {
290 order,
291 description: format!("Port {} functions", fns_to_port.len()),
292 source_symbols: fns_to_port,
293 effort: "high".to_string(),
294 dependencies: dep,
295 });
296 order += 1;
297 }
298
299 let tests_to_port: Vec<String> = self
301 .graph_a
302 .units()
303 .iter()
304 .filter(|u| u.unit_type == CodeUnitType::Test && !names_b.contains(&u.name))
305 .map(|u| u.name.clone())
306 .collect();
307
308 if !tests_to_port.is_empty() {
309 let dep = if order > 1 {
310 vec![order - 1]
311 } else {
312 Vec::new()
313 };
314 steps.push(MigrationStep {
315 order,
316 description: format!("Port {} tests", tests_to_port.len()),
317 source_symbols: tests_to_port,
318 effort: "medium".to_string(),
319 dependencies: dep,
320 });
321 }
322
323 let covered: std::collections::HashSet<String> = steps
325 .iter()
326 .flat_map(|s| s.source_symbols.iter().cloned())
327 .collect();
328
329 let remaining: Vec<String> = names_a
330 .difference(&names_b)
331 .filter(|n| !covered.contains(*n))
332 .cloned()
333 .collect();
334
335 if !remaining.is_empty() {
336 let prev_order = steps.last().map(|s| s.order).unwrap_or(0);
337 steps.push(MigrationStep {
338 order: prev_order + 1,
339 description: format!("Port {} remaining symbols", remaining.len()),
340 source_symbols: remaining,
341 effort: "low".to_string(),
342 dependencies: if prev_order > 0 {
343 vec![prev_order]
344 } else {
345 Vec::new()
346 },
347 });
348 }
349
350 steps
351 }
352
353 fn compare_structural(&self) -> StructuralDiff {
356 let dirs_a = self.extract_directories(self.graph_a);
357 let dirs_b = self.extract_directories(self.graph_b);
358
359 let only_in_a: Vec<String> = dirs_a
360 .keys()
361 .filter(|d| !dirs_b.contains_key(*d))
362 .cloned()
363 .collect();
364 let only_in_b: Vec<String> = dirs_b
365 .keys()
366 .filter(|d| !dirs_a.contains_key(*d))
367 .cloned()
368 .collect();
369
370 let mut modified = Vec::new();
371 for (dir, syms_a) in &dirs_a {
372 if let Some(syms_b) = dirs_b.get(dir) {
373 let set_a: std::collections::HashSet<&String> = syms_a.iter().collect();
374 let set_b: std::collections::HashSet<&String> = syms_b.iter().collect();
375
376 let only_a: Vec<String> = set_a.difference(&set_b).map(|s| (*s).clone()).collect();
377 let only_b_list: Vec<String> =
378 set_b.difference(&set_a).map(|s| (*s).clone()).collect();
379 let common: Vec<String> =
380 set_a.intersection(&set_b).map(|s| (*s).clone()).collect();
381
382 if !only_a.is_empty() || !only_b_list.is_empty() {
383 modified.push(ModuleDiff {
384 module: dir.clone(),
385 symbols_only_a: only_a,
386 symbols_only_b: only_b_list,
387 common_symbols: common,
388 });
389 }
390 }
391 }
392
393 StructuralDiff {
394 only_in_a,
395 only_in_b,
396 modified,
397 }
398 }
399
400 fn compare_conceptual(&self) -> Vec<ConceptualDiff> {
401 let concepts = [
402 "auth", "payment", "user", "database", "api", "error", "config", "cache", "log",
403 ];
404
405 concepts
406 .iter()
407 .map(|c| self.compare_concept(c))
408 .filter(|d| !d.in_a.is_empty() || !d.in_b.is_empty())
409 .collect()
410 }
411
412 fn compare_patterns(&self) -> Vec<PatternDiff> {
413 let suffixes = [
414 "handler",
415 "service",
416 "controller",
417 "repository",
418 "factory",
419 "manager",
420 ];
421 let mut diffs = Vec::new();
422
423 for suffix in &suffixes {
424 let count_a = self
425 .graph_a
426 .units()
427 .iter()
428 .filter(|u| u.name.to_lowercase().ends_with(suffix))
429 .count();
430 let count_b = self
431 .graph_b
432 .units()
433 .iter()
434 .filter(|u| u.name.to_lowercase().ends_with(suffix))
435 .count();
436
437 if count_a > 0 || count_b > 0 {
438 let mut variations = Vec::new();
439 if count_a > 0 && count_b == 0 {
440 variations.push(PatternVariation {
441 description: format!("*_{} pattern only used in {}", suffix, self.label_a),
442 source: self.label_a.clone(),
443 });
444 } else if count_b > 0 && count_a == 0 {
445 variations.push(PatternVariation {
446 description: format!("*_{} pattern only used in {}", suffix, self.label_b),
447 source: self.label_b.clone(),
448 });
449 }
450
451 diffs.push(PatternDiff {
452 pattern: format!("*_{}", suffix),
453 instances_a: count_a,
454 instances_b: count_b,
455 variations,
456 });
457 }
458 }
459
460 diffs
461 }
462
463 fn extract_directories(&self, graph: &CodeGraph) -> HashMap<String, Vec<String>> {
464 let mut dirs: HashMap<String, Vec<String>> = HashMap::new();
465 for unit in graph.units() {
466 let dir = unit
467 .file_path
468 .parent()
469 .map(|p| p.display().to_string())
470 .unwrap_or_default();
471 dirs.entry(dir).or_default().push(unit.name.clone());
472 }
473 dirs
474 }
475}
476
477#[cfg(test)]
480mod tests {
481 use super::*;
482 use crate::types::{CodeUnit, CodeUnitType, Language, Span};
483 use std::path::PathBuf;
484
485 fn graph_a() -> CodeGraph {
486 let mut g = CodeGraph::with_default_dimension();
487 g.add_unit(CodeUnit::new(
488 CodeUnitType::Function,
489 Language::Rust,
490 "process_payment".to_string(),
491 "billing::process_payment".to_string(),
492 PathBuf::from("src/billing.rs"),
493 Span::new(1, 0, 20, 0),
494 ));
495 g.add_unit(CodeUnit::new(
496 CodeUnitType::Type,
497 Language::Rust,
498 "PaymentResult".to_string(),
499 "billing::PaymentResult".to_string(),
500 PathBuf::from("src/billing.rs"),
501 Span::new(21, 0, 30, 0),
502 ));
503 g.add_unit(CodeUnit::new(
504 CodeUnitType::Function,
505 Language::Rust,
506 "auth_user".to_string(),
507 "auth::auth_user".to_string(),
508 PathBuf::from("src/auth.rs"),
509 Span::new(1, 0, 15, 0),
510 ));
511 g
512 }
513
514 fn graph_b() -> CodeGraph {
515 let mut g = CodeGraph::with_default_dimension();
516 g.add_unit(CodeUnit::new(
517 CodeUnitType::Function,
518 Language::Rust,
519 "process_payment".to_string(),
520 "billing::process_payment".to_string(),
521 PathBuf::from("src/billing.rs"),
522 Span::new(1, 0, 25, 0),
523 ));
524 g.add_unit(CodeUnit::new(
525 CodeUnitType::Function,
526 Language::Rust,
527 "validate_payment".to_string(),
528 "billing::validate_payment".to_string(),
529 PathBuf::from("src/billing.rs"),
530 Span::new(26, 0, 40, 0),
531 ));
532 g
533 }
534
535 #[test]
536 fn compare_finds_differences() {
537 let a = graph_a();
538 let b = graph_b();
539 let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
540 let result = comparer.compare();
541
542 assert_eq!(result.summary.units_a, 3);
543 assert_eq!(result.summary.units_b, 2);
544 assert!(result.summary.common_symbols >= 1); }
546
547 #[test]
548 fn compare_concept() {
549 let a = graph_a();
550 let b = graph_b();
551 let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
552 let diff = comparer.compare_concept("payment");
553
554 assert!(!diff.in_a.is_empty());
555 assert!(!diff.in_b.is_empty());
556 }
557
558 #[test]
559 fn migration_plan_orders_types_first() {
560 let a = graph_a();
561 let b = graph_b();
562 let comparer = CodebaseComparer::new(&a, "legacy", &b, "new");
563 let plan = comparer.migration_plan();
564
565 assert!(!plan.is_empty());
566 if plan.len() >= 2 {
568 assert!(plan[0].description.contains("type"));
569 }
570 }
571}