1use rayon::prelude::*;
4use rustc_hash::{FxHashMap, FxHashSet};
5use std::path::Path;
6
7use crate::{
8 hash::{base_pow, hash_window, roll, token_hash},
9 models::{CpdClone, DetectionToken, Fragment, Location, SourceFile, TokenKind},
10};
11
12type WindowStore = FxHashMap<u64, Occurrence>;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22struct Occurrence {
23 source_id: usize,
25 token_start: usize,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33struct CloneDedupKey {
34 a_id: String,
35 a_start_line: u32,
36 b_id: String,
37 b_start_line: u32,
38}
39
40impl CloneDedupKey {
41 fn from_clone(c: &CpdClone) -> Self {
42 let a_key = (&c.fragment_a.source_id, c.fragment_a.start.line);
44 let b_key = (&c.fragment_b.source_id, c.fragment_b.start.line);
45 if a_key <= b_key {
46 Self {
47 a_id: c.fragment_a.source_id.clone(),
48 a_start_line: c.fragment_a.start.line,
49 b_id: c.fragment_b.source_id.clone(),
50 b_start_line: c.fragment_b.start.line,
51 }
52 } else {
53 Self {
54 a_id: c.fragment_b.source_id.clone(),
55 a_start_line: c.fragment_b.start.line,
56 b_id: c.fragment_a.source_id.clone(),
57 b_start_line: c.fragment_a.start.line,
58 }
59 }
60 }
61}
62
63pub fn detect(files: &[SourceFile], min_tokens: usize) -> Vec<CpdClone> {
72 detect_with_options(files, min_tokens, false, 0)
73}
74
75pub fn detect_with_options(
82 files: &[SourceFile],
83 min_tokens: usize,
84 skip_local: bool,
85 min_lines: usize,
86) -> Vec<CpdClone> {
87 if files.is_empty() || min_tokens == 0 {
88 return vec![];
89 }
90
91 let mut by_format: FxHashMap<&str, Vec<usize>> = FxHashMap::default();
93 for (idx, file) in files.iter().enumerate() {
94 by_format.entry(file.format.as_str()).or_default().push(idx);
95 }
96 let mut format_groups: Vec<(&str, Vec<usize>)> = by_format.into_iter().collect();
97 format_groups.sort_unstable_by_key(|(fmt, _)| *fmt);
98 for (_, group) in &mut format_groups {
99 group.sort_unstable_by_key(|&idx| files[idx].id.as_str());
100 }
101
102 let all_clones: Vec<Vec<CpdClone>> = format_groups
103 .into_par_iter()
104 .map(|(_format, indices)| {
105 let prepared: Vec<PreparedSource> = indices
109 .iter()
110 .map(|&idx| {
111 let file = &files[idx];
112 let mut hashes = Vec::with_capacity(file.tokens.len());
113 let mut spans: Vec<(Location, Location)> =
114 Vec::with_capacity(file.tokens.len());
115 for t in &file.tokens {
116 if t.kind == TokenKind::Ignore {
117 continue;
118 }
119 hashes.push(token_hash(t.kind.discriminant(), &t.value));
120 spans.push((t.start.clone(), t.end.clone()));
121 }
122 PreparedSource {
123 id: file.id.clone(),
124 format: file.format.clone(),
125 hashes,
126 spans,
127 }
128 })
129 .collect();
130 detect_in_group(&prepared, min_tokens, skip_local, min_lines)
131 })
132 .collect();
133
134 let mut clones: Vec<CpdClone> = all_clones.into_iter().flatten().collect();
135 dedup_exact_clones(&mut clones);
136
137 clones.sort_by(|a, b| {
138 (
139 &a.fragment_a.source_id,
140 a.fragment_a.start.line,
141 &a.fragment_b.source_id,
142 a.fragment_b.start.line,
143 )
144 .cmp(&(
145 &b.fragment_a.source_id,
146 b.fragment_a.start.line,
147 &b.fragment_b.source_id,
148 b.fragment_b.start.line,
149 ))
150 });
151
152 clones
153}
154
155pub struct PreparedSource {
164 pub id: String,
165 pub format: String,
166 pub hashes: Vec<u64>,
167 pub spans: Vec<(Location, Location)>,
168}
169
170impl PreparedSource {
171 pub fn from_detection_tokens(id: String, format: String, tokens: &[DetectionToken]) -> Self {
173 let mut hashes = Vec::with_capacity(tokens.len());
174 let mut spans = Vec::with_capacity(tokens.len());
175 for t in tokens {
176 hashes.push(t.hash);
177 spans.push((t.start.clone(), t.end.clone()));
178 }
179 Self {
180 id,
181 format,
182 hashes,
183 spans,
184 }
185 }
186}
187
188pub fn detect_prepared(
192 format_groups: Vec<Vec<PreparedSource>>,
193 min_tokens: usize,
194 skip_local: bool,
195 min_lines: usize,
196) -> Vec<CpdClone> {
197 if format_groups.is_empty() || min_tokens == 0 {
198 return vec![];
199 }
200
201 let all_clones: Vec<Vec<CpdClone>> = format_groups
202 .into_par_iter()
203 .map(|group| detect_in_group(&group, min_tokens, skip_local, min_lines))
204 .collect();
205
206 let mut clones: Vec<CpdClone> = all_clones.into_iter().flatten().collect();
207 dedup_exact_clones(&mut clones);
208
209 clones.sort_by(|a, b| {
210 (
211 &a.fragment_a.source_id,
212 a.fragment_a.start.line,
213 &a.fragment_b.source_id,
214 a.fragment_b.start.line,
215 )
216 .cmp(&(
217 &b.fragment_a.source_id,
218 b.fragment_a.start.line,
219 &b.fragment_b.source_id,
220 b.fragment_b.start.line,
221 ))
222 });
223
224 clones
225}
226
227fn detect_in_group(
232 prepared: &[PreparedSource],
233 min_tokens: usize,
234 skip_local: bool,
235 min_lines: usize,
236) -> Vec<CpdClone> {
237 let window_power = base_pow(min_tokens.saturating_sub(1));
240
241 let total_windows: usize = prepared
243 .iter()
244 .map(|p| p.hashes.len().saturating_sub(min_tokens))
245 .sum();
246 let mut store: WindowStore =
247 FxHashMap::with_capacity_and_hasher(total_windows, Default::default());
248
249 let mut clones: Vec<CpdClone> = Vec::new();
250 const SECONDARY_OCCURRENCE_CAP: usize = 2;
254 let mut repeated_windows: FxHashMap<u64, Vec<Occurrence>> = FxHashMap::default();
255
256 for (file_idx, source) in prepared.iter().enumerate() {
257 let hashes = &source.hashes;
258 if hashes.len() < min_tokens {
259 continue;
260 }
261 let windows_len = hashes.len() - min_tokens + 1;
262
263 let mut open_clone: Option<OpenClone> = None;
268
269 let mut window_hash = hash_window(&hashes[..min_tokens]);
270
271 for token_start in 0..windows_len {
272 if token_start > 0 {
273 window_hash = roll(
274 window_hash,
275 hashes[token_start - 1],
276 hashes[token_start + min_tokens - 1],
277 window_power,
278 );
279 }
280
281 let current = Occurrence {
282 source_id: file_idx,
283 token_start,
284 };
285
286 match store.get(&window_hash).copied() {
287 Some(stored) if windows_match(stored, current, prepared, min_tokens) => {
288 if open_clone.is_none() {
289 open_clone = Some(OpenClone {
290 stored_occurrence: stored,
291 current_start: token_start,
292 match_len: min_tokens,
293 });
294 } else if let Some(ref mut oc) = open_clone {
295 oc.match_len += 1;
297 }
298 remember_repeated_window(
299 &mut repeated_windows,
300 window_hash,
301 stored,
302 SECONDARY_OCCURRENCE_CAP,
303 );
304 remember_repeated_window(
305 &mut repeated_windows,
306 window_hash,
307 current,
308 SECONDARY_OCCURRENCE_CAP,
309 );
310 }
313 _ => {
314 flush_clone(
316 open_clone.take(),
317 file_idx,
318 prepared,
319 skip_local,
320 min_lines,
321 &mut clones,
322 );
323 store.insert(window_hash, current);
324 }
325 }
326 }
327
328 flush_clone(
330 open_clone.take(),
331 file_idx,
332 prepared,
333 skip_local,
334 min_lines,
335 &mut clones,
336 );
337 }
338
339 add_secondary_clones(
340 repeated_windows,
341 prepared,
342 min_tokens,
343 skip_local,
344 min_lines,
345 &mut clones,
346 );
347
348 clones
349}
350
351struct OpenClone {
356 stored_occurrence: Occurrence,
357 current_start: usize,
358 match_len: usize,
359}
360
361fn windows_match(
364 stored: Occurrence,
365 current: Occurrence,
366 prepared: &[PreparedSource],
367 min_tokens: usize,
368) -> bool {
369 if stored.source_id == current.source_id && stored.token_start == current.token_start {
370 return false;
371 }
372 let stored_hashes = &prepared[stored.source_id].hashes;
373 let current_hashes = &prepared[current.source_id].hashes;
374 if stored.token_start + min_tokens > stored_hashes.len()
375 || current.token_start + min_tokens > current_hashes.len()
376 {
377 return false;
378 }
379 stored_hashes[stored.token_start..stored.token_start + min_tokens]
380 == current_hashes[current.token_start..current.token_start + min_tokens]
381}
382
383fn flush_clone(
389 open: Option<OpenClone>,
390 current_file_idx: usize,
391 prepared: &[PreparedSource],
392 skip_local: bool,
393 min_lines: usize,
394 clones: &mut Vec<CpdClone>,
395) {
396 let oc = match open {
397 Some(o) => o,
398 None => return,
399 };
400
401 let existing = &oc.stored_occurrence;
402 let cur_start = oc.current_start;
403 let match_len = oc.match_len;
404
405 let existing_file = &prepared[existing.source_id];
406 let current_file = &prepared[current_file_idx];
407
408 let ex_start = existing.token_start;
409 let ex_end = ex_start + match_len - 1;
410 let cur_end = cur_start + match_len - 1;
411
412 if skip_local {
414 let dir_a = Path::new(&existing_file.id).parent();
415 let dir_b = Path::new(¤t_file.id).parent();
416 if dir_a == dir_b {
417 return;
418 }
419 }
420
421 let fragment_a = match make_fragment(&existing_file.id, &existing_file.spans, ex_start, ex_end)
422 {
423 Some(f) => f,
424 None => return,
425 };
426 let fragment_b = match make_fragment(¤t_file.id, ¤t_file.spans, cur_start, cur_end)
427 {
428 Some(f) => f,
429 None => return,
430 };
431
432 if min_lines > 0 {
436 let lines = fragment_a.end.line as usize - fragment_a.start.line as usize;
437 if lines < min_lines {
438 return;
439 }
440 }
441
442 clones.push(CpdClone {
443 format: current_file.format.clone(),
444 fragment_a,
445 fragment_b,
446 token_count: match_len as u32,
447 });
448}
449
450fn make_fragment(
451 source_id: &str,
452 spans: &[(Location, Location)],
453 start_idx: usize,
454 end_idx: usize,
455) -> Option<Fragment> {
456 let (first_start, _) = spans.get(start_idx)?;
457 let (_, last_end) = spans.get(end_idx)?;
458 Some(Fragment {
459 source_id: source_id.to_string(),
460 start: first_start.clone(),
461 end: last_end.clone(),
462 range: [start_idx as u32, end_idx as u32],
463 blame: None,
464 })
465}
466
467fn dedup_exact_clones(clones: &mut Vec<CpdClone>) {
472 for clone in clones.iter_mut() {
474 let a_key = (&clone.fragment_a.source_id, clone.fragment_a.start.line);
475 let b_key = (&clone.fragment_b.source_id, clone.fragment_b.start.line);
476 if a_key > b_key {
477 std::mem::swap(&mut clone.fragment_a, &mut clone.fragment_b);
478 }
479 }
480
481 let mut seen: FxHashSet<CloneDedupKey> = FxHashSet::default();
482 clones.retain(|c| seen.insert(CloneDedupKey::from_clone(c)));
483}
484
485fn remember_repeated_window(
490 repeated_windows: &mut FxHashMap<u64, Vec<Occurrence>>,
491 hash: u64,
492 occurrence: Occurrence,
493 cap: usize,
494) {
495 let bucket = repeated_windows.entry(hash).or_default();
496 if bucket
497 .iter()
498 .any(|s| s.source_id == occurrence.source_id && s.token_start == occurrence.token_start)
499 {
500 return;
501 }
502 if bucket.len() < cap {
503 bucket.push(occurrence);
504 }
505}
506
507struct SecondaryOpen {
508 clone: CpdClone,
509 source_a: usize,
510 source_b: usize,
511 last_token_start_a: usize,
512 last_token_start_b: usize,
513}
514
515fn add_secondary_clones(
516 repeated_windows: FxHashMap<u64, Vec<Occurrence>>,
517 prepared: &[PreparedSource],
518 min_tokens: usize,
519 skip_local: bool,
520 min_lines: usize,
521 clones: &mut Vec<CpdClone>,
522) {
523 if repeated_windows.is_empty() {
524 return;
525 }
526
527 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
528 struct Candidate {
529 source_a: usize,
530 source_b: usize,
531 token_a: usize,
532 token_b: usize,
533 }
534
535 let mut candidates: Vec<Candidate> = Vec::new();
536 for occurrences in repeated_windows.values() {
537 if occurrences.len() < 2 {
538 continue;
539 }
540 for li in 0..occurrences.len() {
541 for ri in li + 1..occurrences.len() {
542 let left = &occurrences[li];
543 let right = &occurrences[ri];
544 if left.source_id == right.source_id && left.token_start == right.token_start {
545 continue;
546 }
547 let lh = &prepared[left.source_id].hashes;
548 let rh = &prepared[right.source_id].hashes;
549 let la = left.token_start;
550 let ra = right.token_start;
551 if la + min_tokens > lh.len() || ra + min_tokens > rh.len() {
552 continue;
553 }
554 if lh[la..la + min_tokens] != rh[ra..ra + min_tokens] {
555 continue;
556 }
557 let (sa, ta, sb, tb) =
558 if (left.source_id, left.token_start) <= (right.source_id, right.token_start) {
559 (
560 left.source_id,
561 left.token_start,
562 right.source_id,
563 right.token_start,
564 )
565 } else {
566 (
567 right.source_id,
568 right.token_start,
569 left.source_id,
570 left.token_start,
571 )
572 };
573 candidates.push(Candidate {
574 source_a: sa,
575 source_b: sb,
576 token_a: ta,
577 token_b: tb,
578 });
579 }
580 }
581 }
582 if candidates.is_empty() {
583 return;
584 }
585 candidates.sort_unstable();
586 candidates.dedup();
587
588 let mut coverage = LineCoverage::from_clones(prepared, clones);
590 let mut open: Option<SecondaryOpen> = None;
591
592 for candidate in candidates {
593 if let Some(current) = open.as_mut()
594 && current.source_a == candidate.source_a
595 && current.source_b == candidate.source_b
596 && current.last_token_start_a + 1 == candidate.token_a
597 && current.last_token_start_b + 1 == candidate.token_b
598 {
599 let new_match_len = current.clone.token_count as usize + 1;
601 let end_idx_a = candidate.token_a + min_tokens;
602 let end_idx_b = candidate.token_b + min_tokens;
603 if let Some(frag_a_end) = prepared[current.source_a].spans.get(end_idx_a) {
604 current.clone.fragment_a.end = frag_a_end.1.clone();
605 current.clone.fragment_a.range[1] = end_idx_a as u32;
606 }
607 if let Some(frag_b_end) = prepared[current.source_b].spans.get(end_idx_b) {
608 current.clone.fragment_b.end = frag_b_end.1.clone();
609 current.clone.fragment_b.range[1] = end_idx_b as u32;
610 }
611 current.clone.token_count = new_match_len as u32;
612 current.last_token_start_a = candidate.token_a;
613 current.last_token_start_b = candidate.token_b;
614 continue;
615 }
616
617 flush_secondary_clone(
618 open.take(),
619 prepared,
620 skip_local,
621 min_lines,
622 clones,
623 &mut coverage,
624 );
625
626 let start_a = candidate.token_a;
628 let end_a = start_a + min_tokens - 1;
629 let start_b = candidate.token_b;
630 let end_b = start_b + min_tokens - 1;
631
632 let frag_a = match make_fragment(
633 &prepared[candidate.source_a].id,
634 &prepared[candidate.source_a].spans,
635 start_a,
636 end_a,
637 ) {
638 Some(f) => f,
639 None => continue,
640 };
641 let frag_b = match make_fragment(
642 &prepared[candidate.source_b].id,
643 &prepared[candidate.source_b].spans,
644 start_b,
645 end_b,
646 ) {
647 Some(f) => f,
648 None => continue,
649 };
650
651 open = Some(SecondaryOpen {
652 clone: CpdClone {
653 format: prepared[candidate.source_a].format.clone(),
654 fragment_a: frag_a,
655 fragment_b: frag_b,
656 token_count: min_tokens as u32,
657 },
658 source_a: candidate.source_a,
659 source_b: candidate.source_b,
660 last_token_start_a: candidate.token_a,
661 last_token_start_b: candidate.token_b,
662 });
663 }
664
665 flush_secondary_clone(
666 open.take(),
667 prepared,
668 skip_local,
669 min_lines,
670 clones,
671 &mut coverage,
672 );
673}
674
675fn flush_secondary_clone(
676 open: Option<SecondaryOpen>,
677 prepared: &[PreparedSource],
678 skip_local: bool,
679 min_lines: usize,
680 clones: &mut Vec<CpdClone>,
681 coverage: &mut LineCoverage,
682) {
683 let Some(oc) = open else {
684 return;
685 };
686
687 let range_a = fragment_line_range(&oc.clone.fragment_a);
688 let range_b = fragment_line_range(&oc.clone.fragment_b);
689
690 if skip_local {
692 let dir_a = Path::new(&prepared[oc.source_a].id).parent();
693 let dir_b = Path::new(&prepared[oc.source_b].id).parent();
694 if dir_a == dir_b {
695 return;
696 }
697 }
698
699 if min_lines > 0 {
701 let lines = oc.clone.fragment_a.end.line as usize - oc.clone.fragment_a.start.line as usize;
702 if lines < min_lines {
703 return;
704 }
705 }
706
707 if !coverage.extends(oc.source_a, range_a) || !coverage.extends(oc.source_b, range_b) {
711 return;
712 }
713
714 let before = clones.len();
715 clones.push(oc.clone);
716
717 if clones.len() > before {
719 coverage.insert(oc.source_a, range_a);
720 coverage.insert(oc.source_b, range_b);
721 }
722}
723
724fn fragment_line_range(fragment: &Fragment) -> (usize, usize) {
725 let start = fragment.start.line as usize;
726 let end = fragment.end.line as usize;
727 (start.min(end), start.max(end))
728}
729
730struct LineCoverage {
735 ranges_by_source: Vec<Vec<(usize, usize)>>,
736}
737
738impl LineCoverage {
739 fn from_clones(prepared: &[PreparedSource], clones: &[CpdClone]) -> Self {
740 let mut source_lookup: FxHashMap<&str, usize> = FxHashMap::default();
741 for (idx, source) in prepared.iter().enumerate() {
742 source_lookup.insert(source.id.as_str(), idx);
743 }
744 let mut coverage = Self {
745 ranges_by_source: vec![Vec::new(); prepared.len()],
746 };
747 for clone in clones {
748 if let Some(idx) = source_lookup.get(clone.fragment_a.source_id.as_str()) {
749 coverage.insert(*idx, fragment_line_range(&clone.fragment_a));
750 }
751 if let Some(idx) = source_lookup.get(clone.fragment_b.source_id.as_str()) {
752 coverage.insert(*idx, fragment_line_range(&clone.fragment_b));
753 }
754 }
755 coverage
756 }
757
758 fn extends(&self, source_idx: usize, range: (usize, usize)) -> bool {
759 let Some(ranges) = self.ranges_by_source.get(source_idx) else {
760 return true;
761 };
762 let mut next_line = range.0;
763 for &(start, end) in ranges {
764 if end < next_line {
765 continue;
766 }
767 if start > next_line {
768 return true;
769 }
770 next_line = next_line.max(end.saturating_add(1));
771 if next_line > range.1 {
772 return false;
773 }
774 }
775 next_line <= range.1
776 }
777
778 fn insert(&mut self, source_idx: usize, range: (usize, usize)) {
779 let Some(ranges) = self.ranges_by_source.get_mut(source_idx) else {
780 return;
781 };
782 ranges.push(range);
783 ranges.sort_unstable();
784
785 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
786 for &(start, end) in ranges.iter() {
787 if let Some((_, previous_end)) = merged.last_mut()
788 && start <= previous_end.saturating_add(1)
789 {
790 *previous_end = (*previous_end).max(end);
791 continue;
792 }
793 merged.push((start, end));
794 }
795 *ranges = merged;
796 }
797}
798
799#[cfg(test)]
804mod tests {
805 use super::*;
806 use crate::models::{Location, Token, TokenKind};
807
808 fn loc(line: u32, col: u32, offset: u32) -> Location {
809 Location {
810 line,
811 column: col,
812 offset,
813 }
814 }
815
816 fn make_token(kind: TokenKind, value: &str, line: u32, col: u32, offset: u32) -> Token {
817 let end_col = col + value.len() as u32;
818 let end_off = offset + value.len() as u32;
819 Token {
820 kind,
821 value: value.to_string(),
822 start: loc(line, col, offset),
823 end: loc(line, end_col, end_off),
824 }
825 }
826
827 fn make_file(id: &str, format: &str, tokens: Vec<Token>) -> SourceFile {
828 SourceFile {
829 id: id.to_string(),
830 format: format.to_string(),
831 tokens,
832 }
833 }
834
835 fn js_tokens_ab() -> Vec<Token> {
836 vec![
837 make_token(TokenKind::Keyword, "function", 1, 0, 0),
838 make_token(TokenKind::Other, "hello", 1, 9, 9),
839 make_token(TokenKind::Operator, "(", 1, 14, 14),
840 make_token(TokenKind::Operator, ")", 1, 15, 15),
841 make_token(TokenKind::Operator, "{", 1, 16, 16),
842 make_token(TokenKind::Keyword, "return", 2, 0, 18),
843 make_token(TokenKind::Literal, "42", 2, 7, 25),
844 make_token(TokenKind::Operator, ";", 2, 9, 27),
845 make_token(TokenKind::Operator, "}", 3, 0, 29),
846 ]
847 }
848
849 #[test]
850 fn empty_input_returns_empty() {
851 let result = detect(&[], 10);
852 assert!(result.is_empty());
853 }
854
855 #[test]
856 fn identical_files_detected_as_clone() {
857 let tokens = js_tokens_ab();
858 let file_a = make_file("a.js", "javascript", tokens.clone());
859 let file_b = make_file("b.js", "javascript", tokens);
860 let clones = detect(&[file_a, file_b], 5);
861 assert!(
862 !clones.is_empty(),
863 "identical files must produce at least one clone"
864 );
865 }
866
867 #[test]
868 fn min_tokens_threshold_respected() {
869 let tokens = js_tokens_ab(); let file_a = make_file("a.js", "javascript", tokens.clone());
871 let file_b = make_file("b.js", "javascript", tokens);
872 let clones = detect(&[file_a, file_b], 100);
873 assert!(
874 clones.is_empty(),
875 "no clones when min_tokens exceeds file length"
876 );
877 }
878
879 #[test]
880 fn deduplication_ab_ba_collapse() {
881 let tokens = js_tokens_ab();
882 let file_a = make_file("a.js", "javascript", tokens.clone());
883 let file_b = make_file("b.js", "javascript", tokens);
884 let clones = detect(&[file_a, file_b], 5);
885 assert_eq!(clones.len(), 1, "symmetric pairs must collapse to 1");
887 }
888
889 #[test]
890 fn different_formats_not_cross_detected() {
891 let tokens = js_tokens_ab();
892 let file_js = make_file("a.js", "javascript", tokens.clone());
893 let file_py = make_file("a.py", "python", tokens);
894 let clones = detect(&[file_js, file_py], 5);
895 assert!(
896 clones.is_empty(),
897 "tokens from different formats must not match"
898 );
899 }
900
901 #[test]
902 fn identical_files_maximal_clone() {
903 let tokens = js_tokens_ab();
906 let file_a = make_file("a.js", "javascript", tokens.clone());
907 let file_b = make_file("b.js", "javascript", tokens);
908 let clones = detect(&[file_a, file_b], 5);
909 assert_eq!(
910 clones.len(),
911 1,
912 "open_clone SM must produce one maximal clone"
913 );
914 assert_eq!(
915 clones[0].token_count, 9,
916 "maximal clone must cover all 9 tokens"
917 );
918 }
919
920 #[test]
921 fn three_identical_files_secondary_pass_adds_missing_pair() {
922 let tokens = js_tokens_ab();
923 let file_a = make_file("a.js", "javascript", tokens.clone());
924 let file_b = make_file("b.js", "javascript", tokens.clone());
925 let file_c = make_file("c.js", "javascript", tokens);
926 let clones = detect(&[file_a, file_b, file_c], 5);
927 assert!(
928 clones.len() >= 2,
929 "three identical files must yield at least 2 clone pairs, got {}",
930 clones.len()
931 );
932 }
933
934 #[test]
935 fn clones_sorted_by_source_and_line() {
936 let tokens = js_tokens_ab();
937 let file_a = make_file("a.js", "javascript", tokens.clone());
938 let file_b = make_file("b.js", "javascript", tokens);
939 let clones = detect(&[file_a, file_b], 5);
940 for i in 1..clones.len() {
941 let prev = &clones[i - 1];
942 let curr = &clones[i];
943 assert!(
944 (
945 &prev.fragment_a.source_id,
946 prev.fragment_a.start.line,
947 &prev.fragment_b.source_id,
948 prev.fragment_b.start.line,
949 ) <= (
950 &curr.fragment_a.source_id,
951 curr.fragment_a.start.line,
952 &curr.fragment_b.source_id,
953 curr.fragment_b.start.line,
954 ),
955 "clones must be sorted"
956 );
957 }
958 }
959}