1use crate::graph::body_hash::BodyHash128;
7use crate::graph::unified::concurrent::CodeGraph;
8use crate::graph::unified::node::{NodeId, NodeKind};
9use std::collections::HashMap;
10use std::hash::{Hash, Hasher};
11use std::str::FromStr;
12
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
15pub enum DuplicateType {
16 Body,
18 Signature,
20 Struct,
22}
23
24impl DuplicateType {
25 #[must_use]
27 pub fn parse(value: &str) -> Option<Self> {
28 value.parse().ok()
29 }
30}
31
32impl FromStr for DuplicateType {
33 type Err = anyhow::Error;
34
35 fn from_str(value: &str) -> Result<Self, Self::Err> {
36 match value.trim().to_lowercase().as_str() {
37 "body" | "function" => Ok(Self::Body),
38 "signature" | "sig" => Ok(Self::Signature),
39 "struct" | "type" => Ok(Self::Struct),
40 _ => Err(anyhow::anyhow!("Unknown duplicate type: {value}")),
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct DuplicateConfig {
48 pub threshold: f64,
50 #[allow(dead_code)]
52 pub max_results: usize,
53 pub is_exact_only: bool,
55 pub max_members_per_group: usize,
64}
65
66impl Default for DuplicateConfig {
67 fn default() -> Self {
68 Self {
69 threshold: 0.8,
70 max_results: 1000,
71 is_exact_only: false,
72 max_members_per_group: 10,
73 }
74 }
75}
76
77#[derive(Debug, Clone)]
79pub struct DuplicateGroup {
80 pub hash: u64,
82 pub body_hash_128: Option<BodyHash128>,
87 pub node_ids: Vec<NodeId>,
93 pub total_members: usize,
97 pub members_truncated: bool,
99}
100
101fn compute_hash(graph: &CodeGraph, node_id: NodeId, dup_type: DuplicateType) -> Option<u64> {
103 let entry = graph.nodes().get(node_id)?;
104 if entry.is_unified_loser() {
108 return None;
109 }
110 let strings = graph.strings();
111
112 match dup_type {
113 DuplicateType::Body => {
114 if let Some(body_hash) = entry.body_hash {
117 return Some(body_hash.high ^ body_hash.low);
120 }
121
122 if let Some(sig_id) = entry.signature
124 && let Some(sig) = strings.resolve(sig_id)
125 {
126 let mut hasher = std::collections::hash_map::DefaultHasher::new();
127 sig.hash(&mut hasher);
128 entry.kind.hash(&mut hasher);
129 return Some(hasher.finish());
130 }
131
132 let name = entry
134 .qualified_name
135 .and_then(|id| strings.resolve(id))
136 .or_else(|| strings.resolve(entry.name))?;
137
138 let mut hasher = std::collections::hash_map::DefaultHasher::new();
139 name.hash(&mut hasher);
140 entry.kind.hash(&mut hasher);
141 let lines = entry.end_line.saturating_sub(entry.start_line);
143 lines.hash(&mut hasher);
144 Some(hasher.finish())
145 }
146 DuplicateType::Signature => {
147 if let Some(sig_id) = entry.signature
149 && let Some(sig) = strings.resolve(sig_id)
150 {
151 let mut hasher = std::collections::hash_map::DefaultHasher::new();
152 sig.hash(&mut hasher);
153 return Some(hasher.finish());
154 }
155
156 let name = strings.resolve(entry.name)?;
158 let mut hasher = std::collections::hash_map::DefaultHasher::new();
159 name.hash(&mut hasher);
160 entry.kind.hash(&mut hasher);
161 Some(hasher.finish())
162 }
163 DuplicateType::Struct => {
164 if !matches!(entry.kind, NodeKind::Struct | NodeKind::Class) {
166 return None;
167 }
168
169 let name = entry
171 .qualified_name
172 .and_then(|id| strings.resolve(id))
173 .or_else(|| strings.resolve(entry.name))?;
174
175 let mut hasher = std::collections::hash_map::DefaultHasher::new();
176 name.hash(&mut hasher);
177 entry.kind.hash(&mut hasher);
178 Some(hasher.finish())
179 }
180 }
181}
182
183#[must_use]
202pub fn build_duplicate_groups_graph(
203 dup_type: DuplicateType,
204 graph: &CodeGraph,
205 config: &DuplicateConfig,
206) -> Vec<DuplicateGroup> {
207 let mut hash_to_nodes: HashMap<u64, Vec<NodeId>> = HashMap::new();
208
209 let relevant_kinds: Vec<NodeKind> = match dup_type {
211 DuplicateType::Body | DuplicateType::Signature => {
212 vec![NodeKind::Function, NodeKind::Method]
213 }
214 DuplicateType::Struct => vec![NodeKind::Struct, NodeKind::Class],
215 };
216
217 for (node_id, entry) in graph.nodes().iter() {
229 if entry.is_unified_loser() {
230 continue;
231 }
232 if !relevant_kinds.contains(&entry.kind) {
233 continue;
234 }
235
236 if let Some(hash) = compute_hash(graph, node_id, dup_type) {
237 hash_to_nodes.entry(hash).or_default().push(node_id);
238 }
239 }
240
241 let mut groups: Vec<DuplicateGroup> = hash_to_nodes
243 .into_iter()
244 .filter(|(_, nodes)| {
245 if config.is_exact_only {
246 nodes.len() > 1
247 } else {
248 nodes.len() > 1
251 }
252 })
253 .map(|(hash, mut node_ids)| {
254 let body_hash_128 = if dup_type == DuplicateType::Body {
260 node_ids.first().and_then(|&id| {
261 graph.nodes().get(id).and_then(|entry| {
262 if entry.is_unified_loser() {
263 None
264 } else {
265 entry.body_hash
266 }
267 })
268 })
269 } else {
270 None
271 };
272
273 let strings = graph.strings();
277 let files = graph.files();
278 node_ids.sort_by(|&a, &b| {
279 let path_a = graph
280 .nodes()
281 .get(a)
282 .and_then(|e| files.resolve(e.file))
283 .map(|p| p.to_string_lossy().into_owned())
284 .unwrap_or_default();
285 let path_b = graph
286 .nodes()
287 .get(b)
288 .and_then(|e| files.resolve(e.file))
289 .map(|p| p.to_string_lossy().into_owned())
290 .unwrap_or_default();
291 let name_a = graph
292 .nodes()
293 .get(a)
294 .and_then(|e| strings.resolve(e.name))
295 .as_deref()
296 .map(str::to_owned)
297 .unwrap_or_default();
298 let name_b = graph
299 .nodes()
300 .get(b)
301 .and_then(|e| strings.resolve(e.name))
302 .as_deref()
303 .map(str::to_owned)
304 .unwrap_or_default();
305 path_a
306 .cmp(&path_b)
307 .then_with(|| name_a.cmp(&name_b))
308 .then_with(|| a.cmp(&b))
309 });
310
311 let total_members = node_ids.len();
313 let members_truncated =
314 config.max_members_per_group > 0 && node_ids.len() > config.max_members_per_group;
315 if members_truncated {
316 node_ids.truncate(config.max_members_per_group);
317 }
318
319 DuplicateGroup {
320 hash,
321 body_hash_128,
322 node_ids,
323 total_members,
324 members_truncated,
325 }
326 })
327 .collect();
328
329 groups.sort_by(|a, b| b.total_members.cmp(&a.total_members));
332
333 groups.truncate(config.max_results);
335
336 groups
337}
338
339#[cfg(test)]
340mod tests {
341 use super::*;
342 use crate::graph::unified::storage::arena::NodeEntry;
343 use std::path::Path;
344
345 fn create_test_graph_with_signatures(
347 nodes: &[(&str, NodeKind, Option<&str>)],
348 ) -> (CodeGraph, Vec<NodeId>) {
349 let mut graph = CodeGraph::new();
350 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
351 let mut node_ids = Vec::new();
352
353 for (name, kind, signature) in nodes {
354 let name_id = graph.strings_mut().intern(name).unwrap();
355 let mut entry = NodeEntry::new(*kind, name_id, file_id).with_qualified_name(name_id);
356
357 if let Some(sig) = signature {
358 let sig_id = graph.strings_mut().intern(sig).unwrap();
359 entry = entry.with_signature(sig_id);
360 }
361
362 let node_id = graph.nodes_mut().alloc(entry).unwrap();
363 node_ids.push(node_id);
364 }
365
366 (graph, node_ids)
367 }
368
369 fn create_test_graph_with_spans(
371 nodes: &[(&str, NodeKind, u32, u32)], ) -> (CodeGraph, Vec<NodeId>) {
373 let mut graph = CodeGraph::new();
374 let file_id = graph.files_mut().register(Path::new("test.rs")).unwrap();
375 let mut node_ids = Vec::new();
376
377 for (name, kind, start_line, end_line) in nodes {
378 let name_id = graph.strings_mut().intern(name).unwrap();
379 let entry = NodeEntry::new(*kind, name_id, file_id)
380 .with_qualified_name(name_id)
381 .with_location(*start_line, 0, *end_line, 0);
382
383 let node_id = graph.nodes_mut().alloc(entry).unwrap();
384 node_ids.push(node_id);
385 }
386
387 (graph, node_ids)
388 }
389
390 #[test]
391 fn test_empty_graph() {
392 let graph = CodeGraph::new();
393 let config = DuplicateConfig::default();
394
395 let groups = build_duplicate_groups_graph(DuplicateType::Body, &graph, &config);
396 assert!(groups.is_empty());
397 }
398
399 #[test]
400 fn test_no_duplicates_unique_signatures() {
401 let nodes = [
403 ("func_a", NodeKind::Function, Some("fn func_a() -> i32")),
404 ("func_b", NodeKind::Function, Some("fn func_b() -> String")),
405 (
406 "func_c",
407 NodeKind::Function,
408 Some("fn func_c(x: i32) -> bool"),
409 ),
410 ];
411 let (graph, _) = create_test_graph_with_signatures(&nodes);
412 let config = DuplicateConfig::default();
413
414 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
415 assert!(groups.is_empty(), "No duplicates should be found");
416 }
417
418 #[test]
419 fn test_signature_duplicates() {
420 let nodes = [
422 (
423 "func_a",
424 NodeKind::Function,
425 Some("fn process(x: i32) -> i32"),
426 ),
427 (
428 "func_b",
429 NodeKind::Function,
430 Some("fn process(x: i32) -> i32"),
431 ),
432 ("func_c", NodeKind::Function, Some("fn other() -> String")),
433 ];
434 let (graph, node_ids) = create_test_graph_with_signatures(&nodes);
435 let config = DuplicateConfig::default();
436
437 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
438 assert_eq!(groups.len(), 1, "Should find one duplicate group");
439 assert_eq!(
440 groups[0].node_ids.len(),
441 2,
442 "Group should have 2 duplicates"
443 );
444 assert!(groups[0].node_ids.contains(&node_ids[0]));
445 assert!(groups[0].node_ids.contains(&node_ids[1]));
446 }
447
448 #[test]
449 fn test_body_duplicates_with_signatures() {
450 let nodes = [
452 (
453 "func_a",
454 NodeKind::Function,
455 Some("fn compute(x: i32) -> i32"),
456 ),
457 (
458 "func_b",
459 NodeKind::Function,
460 Some("fn compute(x: i32) -> i32"),
461 ),
462 (
463 "func_c",
464 NodeKind::Method,
465 Some("fn compute(x: i32) -> i32"),
466 ), ];
468 let (graph, node_ids) = create_test_graph_with_signatures(&nodes);
469 let config = DuplicateConfig::default();
470
471 let groups = build_duplicate_groups_graph(DuplicateType::Body, &graph, &config);
472 assert_eq!(groups.len(), 1, "Should find one duplicate group");
475 assert_eq!(groups[0].node_ids.len(), 2);
476 assert!(groups[0].node_ids.contains(&node_ids[0]));
477 assert!(groups[0].node_ids.contains(&node_ids[1]));
478 }
479
480 #[test]
481 fn test_body_duplicates_fallback_to_name_and_span() {
482 let nodes = [
484 ("helper", NodeKind::Function, 10, 20), ("helper", NodeKind::Function, 30, 40), ("other", NodeKind::Function, 50, 60), ];
488 let (graph, node_ids) = create_test_graph_with_spans(&nodes);
489 let config = DuplicateConfig::default();
490
491 let groups = build_duplicate_groups_graph(DuplicateType::Body, &graph, &config);
492 assert_eq!(groups.len(), 1, "Should find one duplicate group");
494 assert!(groups[0].node_ids.contains(&node_ids[0]));
495 assert!(groups[0].node_ids.contains(&node_ids[1]));
496 }
497
498 #[test]
499 fn test_struct_duplicates() {
500 let nodes = [
502 ("MyStruct", NodeKind::Struct, None),
503 ("MyStruct", NodeKind::Struct, None),
504 ("MyStruct", NodeKind::Function, None), ("OtherStruct", NodeKind::Struct, None),
506 ];
507 let (graph, node_ids) = create_test_graph_with_signatures(&nodes);
508 let config = DuplicateConfig::default();
509
510 let groups = build_duplicate_groups_graph(DuplicateType::Struct, &graph, &config);
511 assert_eq!(groups.len(), 1, "Should find one duplicate group");
513 assert!(groups[0].node_ids.contains(&node_ids[0]));
514 assert!(groups[0].node_ids.contains(&node_ids[1]));
515 assert!(!groups[0].node_ids.contains(&node_ids[2])); }
517
518 #[test]
519 fn test_class_duplicates() {
520 let nodes = [
522 ("MyClass", NodeKind::Class, None),
523 ("MyClass", NodeKind::Class, None),
524 ("OtherClass", NodeKind::Class, None),
525 ];
526 let (graph, node_ids) = create_test_graph_with_signatures(&nodes);
527 let config = DuplicateConfig::default();
528
529 let groups = build_duplicate_groups_graph(DuplicateType::Struct, &graph, &config);
530 assert_eq!(groups.len(), 1);
531 assert!(groups[0].node_ids.contains(&node_ids[0]));
532 assert!(groups[0].node_ids.contains(&node_ids[1]));
533 }
534
535 #[test]
536 fn test_methods_ignored_for_struct_duplicates() {
537 let nodes = [
539 ("process", NodeKind::Method, Some("fn process()")),
540 ("process", NodeKind::Method, Some("fn process()")),
541 ];
542 let (graph, _) = create_test_graph_with_signatures(&nodes);
543 let config = DuplicateConfig::default();
544
545 let groups = build_duplicate_groups_graph(DuplicateType::Struct, &graph, &config);
546 assert!(
547 groups.is_empty(),
548 "Methods should not be considered for struct duplicates"
549 );
550 }
551
552 #[test]
553 fn test_max_results_limit() {
554 let mut nodes = Vec::new();
556 for i in 0..10 {
557 let sig = format!("fn group{i}()");
559 nodes.push((format!("func_{i}_a"), NodeKind::Function, Some(sig.clone())));
560 nodes.push((format!("func_{i}_b"), NodeKind::Function, Some(sig)));
561 }
562
563 let nodes_ref: Vec<(&str, NodeKind, Option<&str>)> = nodes
564 .iter()
565 .map(|(name, kind, sig)| (name.as_str(), *kind, sig.as_deref()))
566 .collect();
567 let (graph, _) = create_test_graph_with_signatures(&nodes_ref);
568 let config = DuplicateConfig {
569 max_results: 3,
570 ..Default::default()
571 };
572
573 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
574 assert_eq!(groups.len(), 3, "Should respect max_results limit");
575 }
576
577 #[test]
578 fn test_groups_sorted_by_size() {
579 let nodes = [
581 ("large_a", NodeKind::Function, Some("fn large()")),
583 ("large_b", NodeKind::Function, Some("fn large()")),
584 ("large_c", NodeKind::Function, Some("fn large()")),
585 ("small_a", NodeKind::Function, Some("fn small()")),
587 ("small_b", NodeKind::Function, Some("fn small()")),
588 ];
589 let (graph, _) = create_test_graph_with_signatures(&nodes);
590 let config = DuplicateConfig::default();
591
592 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
593 assert_eq!(groups.len(), 2);
594 assert_eq!(groups[0].node_ids.len(), 3, "Largest group should be first");
595 assert_eq!(
596 groups[1].node_ids.len(),
597 2,
598 "Smaller group should be second"
599 );
600 }
601
602 #[test]
603 fn test_single_node_not_duplicate() {
604 let nodes = [
606 ("unique_func", NodeKind::Function, Some("fn unique()")),
607 ("other_func", NodeKind::Function, Some("fn different()")),
608 ];
609 let (graph, _) = create_test_graph_with_signatures(&nodes);
610 let config = DuplicateConfig::default();
611
612 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
613 assert!(
614 groups.is_empty(),
615 "Single nodes should not form duplicate groups"
616 );
617 }
618
619 #[test]
620 fn test_mixed_kinds_not_duplicates() {
621 let nodes = [
623 ("process", NodeKind::Function, Some("fn process()")),
624 ("process", NodeKind::Method, Some("fn process()")),
625 ];
626 let (graph, _) = create_test_graph_with_signatures(&nodes);
627 let config = DuplicateConfig::default();
628
629 let groups = build_duplicate_groups_graph(DuplicateType::Body, &graph, &config);
631 assert!(
632 groups.is_empty(),
633 "Different kinds should not be duplicates for body type"
634 );
635
636 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
638 assert_eq!(
639 groups.len(),
640 1,
641 "Same signature should be duplicates regardless of kind"
642 );
643 }
644
645 #[test]
646 fn test_multiple_duplicate_groups() {
647 let nodes = [
649 ("func_a1", NodeKind::Function, Some("fn alpha()")),
650 ("func_a2", NodeKind::Function, Some("fn alpha()")),
651 ("func_b1", NodeKind::Function, Some("fn beta()")),
652 ("func_b2", NodeKind::Function, Some("fn beta()")),
653 ("unique", NodeKind::Function, Some("fn gamma()")),
654 ];
655 let (graph, _) = create_test_graph_with_signatures(&nodes);
656 let config = DuplicateConfig::default();
657
658 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
659 assert_eq!(groups.len(), 2, "Should find two duplicate groups");
660 }
661
662 #[test]
663 fn test_exact_only_config() {
664 let nodes = [
666 ("func_a", NodeKind::Function, Some("fn exact()")),
667 ("func_b", NodeKind::Function, Some("fn exact()")),
668 ];
669 let (graph, _) = create_test_graph_with_signatures(&nodes);
670 let config = DuplicateConfig {
671 is_exact_only: true,
672 ..Default::default()
673 };
674
675 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
676 assert_eq!(groups.len(), 1);
677 }
678
679 #[test]
693 fn duplicates_query_excludes_unified_losers() {
694 use crate::graph::body_hash::BodyHash128;
695 use crate::graph::unified::build::unification::merge_node_into;
696
697 let mut graph = CodeGraph::new();
698
699 let winner_name = graph.strings_mut().intern("shared_fn").unwrap();
700 let winner_qn = graph.strings_mut().intern("mod_a::shared_fn").unwrap();
701 let sig = graph.strings_mut().intern("fn shared_fn() -> ()").unwrap();
702 let body = BodyHash128 {
703 high: 0xDEAD_BEEF,
704 low: 0xCAFE_F00D,
705 };
706
707 let file_a = graph
708 .files_mut()
709 .register(Path::new("src/a.rs"))
710 .expect("register a");
711 let file_b = graph
712 .files_mut()
713 .register(Path::new("src/b.rs"))
714 .expect("register b");
715
716 let (winner_id, loser_id) = {
717 let arena = graph.nodes_mut();
718 let mut w = NodeEntry::new(NodeKind::Function, winner_name, file_a)
719 .with_location(10, 0, 20, 0)
720 .with_signature(sig)
721 .with_body_hash(body);
722 w.qualified_name = Some(winner_qn);
723 let w_id = arena.alloc(w).unwrap();
724
725 let mut l = NodeEntry::new(NodeKind::Function, winner_name, file_b)
726 .with_location(5, 0, 15, 0)
727 .with_signature(sig)
728 .with_body_hash(body);
729 l.qualified_name = Some(winner_qn);
730 let l_id = arena.alloc(l).unwrap();
731
732 (w_id, l_id)
733 };
734 graph.files_mut().record_node(file_a, winner_id);
735 graph.files_mut().record_node(file_b, loser_id);
736
737 merge_node_into(graph.nodes_mut(), loser_id, winner_id).unwrap();
742
743 let loser_entry = graph.nodes().get(loser_id).expect("loser present");
746 assert!(loser_entry.is_unified_loser());
747 assert!(loser_entry.body_hash.is_none(), "body_hash must be cleared");
748 assert!(loser_entry.signature.is_none(), "signature must be cleared");
749
750 graph.rebuild_indices();
751
752 let groups =
755 build_duplicate_groups_graph(DuplicateType::Body, &graph, &DuplicateConfig::default());
756
757 for group in &groups {
761 assert!(
762 !group.node_ids.contains(&loser_id),
763 "Unified loser leaked into `duplicates:body` group: {:?}",
764 group.node_ids
765 );
766 }
767 assert!(
768 groups.is_empty(),
769 "After filtering losers, the winner is alone and no duplicate group should \
770 be returned; got {} groups: {groups:?}",
771 groups.len(),
772 );
773
774 let sig_groups = build_duplicate_groups_graph(
776 DuplicateType::Signature,
777 &graph,
778 &DuplicateConfig::default(),
779 );
780 for group in &sig_groups {
781 assert!(
782 !group.node_ids.contains(&loser_id),
783 "Unified loser leaked into `duplicates:signature` group: {:?}",
784 group.node_ids
785 );
786 }
787 assert!(
788 sig_groups.is_empty(),
789 "After filtering losers, the winner is alone and no signature duplicate \
790 group should be returned; got {} groups",
791 sig_groups.len(),
792 );
793 }
794
795 fn create_large_group_graph(member_count: usize) -> (CodeGraph, Vec<NodeId>) {
803 let mut graph = CodeGraph::new();
804 let file_a = graph.files_mut().register(Path::new("src/a.rs")).unwrap();
805 let file_b = graph.files_mut().register(Path::new("src/b.rs")).unwrap();
806 let shared_sig = graph
807 .strings_mut()
808 .intern("fn duplicate_fn() -> i32")
809 .unwrap();
810 let mut node_ids = Vec::new();
811
812 for i in 0..member_count {
813 let name = format!("dup_fn_{i}");
814 let name_id = graph.strings_mut().intern(&name).unwrap();
815 let file_id = if i % 2 == 0 { file_a } else { file_b };
817 let entry = NodeEntry::new(NodeKind::Function, name_id, file_id)
818 .with_qualified_name(name_id)
819 .with_signature(shared_sig);
820 let node_id = graph.nodes_mut().alloc(entry).unwrap();
821 node_ids.push(node_id);
822 }
823
824 (graph, node_ids)
825 }
826
827 #[test]
831 fn test_per_group_cap_truncation() {
832 let (graph, _) = create_large_group_graph(10);
833 let config = DuplicateConfig {
834 max_members_per_group: 3,
835 ..Default::default()
836 };
837
838 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
839 assert_eq!(groups.len(), 1, "Expected exactly one duplicate group");
840
841 let group = &groups[0];
842 assert_eq!(
843 group.total_members, 10,
844 "total_members must be pre-truncation count"
845 );
846 assert!(group.members_truncated, "members_truncated must be true");
847 assert_eq!(
848 group.node_ids.len(),
849 3,
850 "displayed node_ids must be capped at max_members_per_group"
851 );
852 }
853
854 #[test]
857 fn test_per_group_cap_zero_means_unlimited() {
858 let (graph, _) = create_large_group_graph(10);
859 let config = DuplicateConfig {
860 max_members_per_group: 0,
861 ..Default::default()
862 };
863
864 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
865 assert_eq!(groups.len(), 1);
866
867 let group = &groups[0];
868 assert_eq!(group.total_members, 10);
869 assert!(
870 !group.members_truncated,
871 "members_truncated must be false when unlimited"
872 );
873 assert_eq!(
874 group.node_ids.len(),
875 10,
876 "All members must be returned when cap is 0"
877 );
878 }
879
880 #[test]
883 fn test_per_group_cap_no_truncation_when_below_cap() {
884 let (graph, _) = create_large_group_graph(5);
885 let config = DuplicateConfig {
886 max_members_per_group: 10,
887 ..Default::default()
888 };
889
890 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
891 assert_eq!(groups.len(), 1);
892
893 let group = &groups[0];
894 assert_eq!(group.total_members, 5);
895 assert!(
896 !group.members_truncated,
897 "members_truncated must be false when count <= cap"
898 );
899 assert_eq!(
900 group.node_ids.len(),
901 5,
902 "All members must be present when below cap"
903 );
904 }
905
906 #[test]
910 fn test_per_group_cap_one_member_displayed() {
911 let (graph, _) = create_large_group_graph(5);
912 let config = DuplicateConfig {
913 max_members_per_group: 1,
914 ..Default::default()
915 };
916
917 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
918 assert_eq!(
919 groups.len(),
920 1,
921 "Group must not be filtered just because only 1 member is displayed"
922 );
923
924 let group = &groups[0];
925 assert_eq!(group.total_members, 5);
926 assert!(group.members_truncated);
927 assert_eq!(group.node_ids.len(), 1);
928 }
929
930 #[test]
934 fn test_per_group_deterministic_ordering() {
935 let mut graph = CodeGraph::new();
936 let file_a = graph.files_mut().register(Path::new("src/a.rs")).unwrap();
937 let file_b = graph.files_mut().register(Path::new("src/b.rs")).unwrap();
938 let shared_sig = graph.strings_mut().intern("fn stable_fn() -> i32").unwrap();
939
940 let names_and_files: &[(&str, _)] = &[
943 ("z_func", file_b),
944 ("m_func", file_b),
945 ("a_func", file_b),
946 ("z_func", file_a),
947 ("a_func", file_a),
948 ];
949
950 let mut inserted: Vec<(String, String, NodeId)> = Vec::new();
951 for (name, file_id) in names_and_files {
952 let name_id = graph.strings_mut().intern(name).unwrap();
953 let entry = NodeEntry::new(NodeKind::Function, name_id, *file_id)
954 .with_qualified_name(name_id)
955 .with_signature(shared_sig);
956 let node_id = graph.nodes_mut().alloc(entry).unwrap();
957 let path = if *file_id == file_a {
958 "src/a.rs".to_owned()
959 } else {
960 "src/b.rs".to_owned()
961 };
962 inserted.push((path, (*name).to_owned(), node_id));
963 }
964
965 let mut expected = inserted.clone();
967 expected.sort_by(|(pa, na, ia), (pb, nb, ib)| {
968 pa.cmp(pb).then_with(|| na.cmp(nb)).then_with(|| ia.cmp(ib))
969 });
970 let expected_ids: Vec<NodeId> = expected.iter().map(|(_, _, id)| *id).collect();
971
972 let config = DuplicateConfig {
973 max_members_per_group: 0, ..Default::default()
975 };
976 let groups = build_duplicate_groups_graph(DuplicateType::Signature, &graph, &config);
977 assert_eq!(groups.len(), 1, "Expected one duplicate group");
978 assert_eq!(
979 groups[0].node_ids, expected_ids,
980 "node_ids must be in deterministic (file_path, name, NodeId) order"
981 );
982 assert_eq!(groups[0].total_members, 5);
983 assert!(!groups[0].members_truncated);
984 }
985}