1use rayon::prelude::*;
4use rustc_hash::{FxHashMap, FxHashSet};
5use std::path::Path;
6
7use crate::{
8 hash::{base_pow, hash_window, roll, token_hash},
9 models::{CpdClone, DetectionToken, Fragment, Location, SourceFile, TokenKind},
10};
11
12type WindowStore = FxHashMap<u64, Occurrence>;
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22struct Occurrence {
23 source_id: usize,
25 token_start: usize,
26}
27
28#[derive(Debug, Clone, PartialEq, Eq, Hash)]
33struct CloneDedupKey {
34 a_id: String,
35 a_start_line: u32,
36 b_id: String,
37 b_start_line: u32,
38}
39
40impl CloneDedupKey {
41 fn from_clone(c: &CpdClone) -> Self {
42 let a_key = (&c.fragment_a.source_id, c.fragment_a.start.line);
44 let b_key = (&c.fragment_b.source_id, c.fragment_b.start.line);
45 if a_key <= b_key {
46 Self {
47 a_id: c.fragment_a.source_id.clone(),
48 a_start_line: c.fragment_a.start.line,
49 b_id: c.fragment_b.source_id.clone(),
50 b_start_line: c.fragment_b.start.line,
51 }
52 } else {
53 Self {
54 a_id: c.fragment_b.source_id.clone(),
55 a_start_line: c.fragment_b.start.line,
56 b_id: c.fragment_a.source_id.clone(),
57 b_start_line: c.fragment_a.start.line,
58 }
59 }
60 }
61}
62
63pub fn detect(files: &[SourceFile], min_tokens: usize) -> Vec<CpdClone> {
72 detect_with_options(files, min_tokens, false, 0)
73}
74
75pub fn detect_with_options(
82 files: &[SourceFile],
83 min_tokens: usize,
84 skip_local: bool,
85 min_lines: usize,
86) -> Vec<CpdClone> {
87 if files.is_empty() || min_tokens == 0 {
88 return vec![];
89 }
90
91 let mut by_format: FxHashMap<&str, Vec<&SourceFile>> = FxHashMap::default();
93 for file in files {
94 by_format.entry(file.format.as_str()).or_default().push(file);
95 }
96 let mut format_groups: Vec<(&str, Vec<&SourceFile>)> = by_format.into_iter().collect();
97 format_groups.sort_unstable_by_key(|(fmt, _)| *fmt);
98 for (_, group) in &mut format_groups {
99 group.sort_unstable_by_key(|&file| file.id.as_str());
100 }
101
102 let mut clones: Vec<CpdClone> = format_groups
103 .into_par_iter()
104 .flat_map(|(_format, files)| {
105 let prepared: Vec<PreparedSource> = files
109 .into_iter()
110 .map(|file| {
111 let mut hashes = Vec::with_capacity(file.tokens.len());
112 let mut spans: Vec<(Location, Location)> =
113 Vec::with_capacity(file.tokens.len());
114 for t in &file.tokens {
115 if t.kind == TokenKind::Ignore {
116 continue;
117 }
118 hashes.push(token_hash(t.kind.discriminant(), &t.value));
119 spans.push((t.start.clone(), t.end.clone()));
120 }
121 PreparedSource {
122 id: file.id.clone(),
123 format: file.format.clone(),
124 hashes,
125 spans,
126 }
127 })
128 .collect();
129 detect_in_group(&prepared, min_tokens, skip_local, min_lines)
130 })
131 .collect();
132
133 dedup_exact_clones(&mut clones);
134
135 clones.sort_by(|a, b| {
136 (
137 &a.fragment_a.source_id,
138 a.fragment_a.start.line,
139 &a.fragment_b.source_id,
140 a.fragment_b.start.line,
141 )
142 .cmp(&(
143 &b.fragment_a.source_id,
144 b.fragment_a.start.line,
145 &b.fragment_b.source_id,
146 b.fragment_b.start.line,
147 ))
148 });
149
150 clones
151}
152
153pub struct PreparedSource {
162 pub id: String,
163 pub format: String,
164 pub hashes: Vec<u64>,
165 pub spans: Vec<(Location, Location)>,
166}
167
168impl PreparedSource {
169 pub fn from_detection_tokens(id: String, format: String, tokens: &[DetectionToken]) -> Self {
171 let mut hashes = Vec::with_capacity(tokens.len());
172 let mut spans = Vec::with_capacity(tokens.len());
173 for t in tokens {
174 hashes.push(t.hash);
175 spans.push((t.start.clone(), t.end.clone()));
176 }
177 Self {
178 id,
179 format,
180 hashes,
181 spans,
182 }
183 }
184}
185
186pub fn detect_prepared(
190 format_groups: Vec<Vec<PreparedSource>>,
191 min_tokens: usize,
192 skip_local: bool,
193 min_lines: usize,
194) -> Vec<CpdClone> {
195 if format_groups.is_empty() || min_tokens == 0 {
196 return vec![];
197 }
198
199 let mut clones: Vec<CpdClone> = format_groups
200 .into_par_iter()
201 .flat_map(|group| detect_in_group(&group, min_tokens, skip_local, min_lines))
202 .collect();
203
204 dedup_exact_clones(&mut clones);
205
206 clones.sort_by(|a, b| {
207 (
208 &a.fragment_a.source_id,
209 a.fragment_a.start.line,
210 &a.fragment_b.source_id,
211 a.fragment_b.start.line,
212 )
213 .cmp(&(
214 &b.fragment_a.source_id,
215 b.fragment_a.start.line,
216 &b.fragment_b.source_id,
217 b.fragment_b.start.line,
218 ))
219 });
220
221 clones
222}
223
224fn detect_in_group(
229 prepared: &[PreparedSource],
230 min_tokens: usize,
231 skip_local: bool,
232 min_lines: usize,
233) -> Vec<CpdClone> {
234 let window_power = base_pow(min_tokens.saturating_sub(1));
237
238 let total_windows: usize = prepared
240 .iter()
241 .map(|p| p.hashes.len().saturating_sub(min_tokens))
242 .sum();
243 let mut store: WindowStore =
244 FxHashMap::with_capacity_and_hasher(total_windows, Default::default());
245
246 let mut clones: Vec<CpdClone> = Vec::new();
247 const SECONDARY_OCCURRENCE_CAP: usize = 2;
251 let mut repeated_windows: FxHashMap<u64, Vec<Occurrence>> = FxHashMap::default();
252
253 for (file_idx, source) in prepared.iter().enumerate() {
254 let hashes = &source.hashes;
255 if hashes.len() < min_tokens {
256 continue;
257 }
258 let windows_len = hashes.len() - min_tokens + 1;
259
260 let mut open_clone: Option<OpenClone> = None;
265
266 let mut window_hash = hash_window(&hashes[..min_tokens]);
267
268 for token_start in 0..windows_len {
269 if token_start > 0 {
270 window_hash = roll(
271 window_hash,
272 hashes[token_start - 1],
273 hashes[token_start + min_tokens - 1],
274 window_power,
275 );
276 }
277
278 let current = Occurrence {
279 source_id: file_idx,
280 token_start,
281 };
282
283 match store.get(&window_hash).copied() {
284 Some(stored) if windows_match(stored, current, prepared, min_tokens) => {
285 if open_clone.is_none() {
286 open_clone = Some(OpenClone {
287 stored_occurrence: stored,
288 current_start: token_start,
289 match_len: min_tokens,
290 });
291 } else if let Some(ref mut oc) = open_clone {
292 oc.match_len += 1;
294 }
295 remember_repeated_window(
296 &mut repeated_windows,
297 window_hash,
298 stored,
299 SECONDARY_OCCURRENCE_CAP,
300 );
301 remember_repeated_window(
302 &mut repeated_windows,
303 window_hash,
304 current,
305 SECONDARY_OCCURRENCE_CAP,
306 );
307 }
310 _ => {
311 flush_clone(
313 open_clone.take(),
314 file_idx,
315 prepared,
316 skip_local,
317 min_lines,
318 &mut clones,
319 );
320 store.insert(window_hash, current);
321 }
322 }
323 }
324
325 flush_clone(
327 open_clone.take(),
328 file_idx,
329 prepared,
330 skip_local,
331 min_lines,
332 &mut clones,
333 );
334 }
335
336 add_secondary_clones(
337 repeated_windows,
338 prepared,
339 min_tokens,
340 skip_local,
341 min_lines,
342 &mut clones,
343 );
344
345 clones
346}
347
348struct OpenClone {
353 stored_occurrence: Occurrence,
354 current_start: usize,
355 match_len: usize,
356}
357
358fn windows_match(
361 stored: Occurrence,
362 current: Occurrence,
363 prepared: &[PreparedSource],
364 min_tokens: usize,
365) -> bool {
366 if stored.source_id == current.source_id && stored.token_start == current.token_start {
367 return false;
368 }
369 let stored_hashes = &prepared[stored.source_id].hashes;
370 let current_hashes = &prepared[current.source_id].hashes;
371 if stored.token_start + min_tokens > stored_hashes.len()
372 || current.token_start + min_tokens > current_hashes.len()
373 {
374 return false;
375 }
376 stored_hashes[stored.token_start..stored.token_start + min_tokens]
377 == current_hashes[current.token_start..current.token_start + min_tokens]
378}
379
380fn flush_clone(
386 open: Option<OpenClone>,
387 current_file_idx: usize,
388 prepared: &[PreparedSource],
389 skip_local: bool,
390 min_lines: usize,
391 clones: &mut Vec<CpdClone>,
392) {
393 let oc = match open {
394 Some(o) => o,
395 None => return,
396 };
397
398 let existing = &oc.stored_occurrence;
399 let cur_start = oc.current_start;
400 let match_len = oc.match_len;
401
402 let existing_file = &prepared[existing.source_id];
403 let current_file = &prepared[current_file_idx];
404
405 let ex_start = existing.token_start;
406 let ex_end = ex_start + match_len - 1;
407 let cur_end = cur_start + match_len - 1;
408
409 if skip_local {
411 let dir_a = Path::new(&existing_file.id).parent();
412 let dir_b = Path::new(¤t_file.id).parent();
413 if dir_a == dir_b {
414 return;
415 }
416 }
417
418 let fragment_a = match make_fragment(&existing_file.id, &existing_file.spans, ex_start, ex_end)
419 {
420 Some(f) => f,
421 None => return,
422 };
423 let fragment_b = match make_fragment(¤t_file.id, ¤t_file.spans, cur_start, cur_end)
424 {
425 Some(f) => f,
426 None => return,
427 };
428
429 if min_lines > 0 {
433 let lines = fragment_a.end.line as usize - fragment_a.start.line as usize;
434 if lines < min_lines {
435 return;
436 }
437 }
438
439 clones.push(CpdClone {
440 format: current_file.format.clone(),
441 fragment_a,
442 fragment_b,
443 token_count: match_len as u32,
444 });
445}
446
447fn make_fragment(
448 source_id: &str,
449 spans: &[(Location, Location)],
450 start_idx: usize,
451 end_idx: usize,
452) -> Option<Fragment> {
453 let (first_start, _) = spans.get(start_idx)?;
454 let (_, last_end) = spans.get(end_idx)?;
455 Some(Fragment {
456 source_id: source_id.to_string(),
457 start: first_start.clone(),
458 end: last_end.clone(),
459 range: [start_idx as u32, end_idx as u32],
460 blame: None,
461 })
462}
463
464fn dedup_exact_clones(clones: &mut Vec<CpdClone>) {
469 for clone in clones.iter_mut() {
471 let a_key = (&clone.fragment_a.source_id, clone.fragment_a.start.line);
472 let b_key = (&clone.fragment_b.source_id, clone.fragment_b.start.line);
473 if a_key > b_key {
474 std::mem::swap(&mut clone.fragment_a, &mut clone.fragment_b);
475 }
476 }
477
478 let mut seen: FxHashSet<CloneDedupKey> = FxHashSet::default();
479 clones.retain(|c| seen.insert(CloneDedupKey::from_clone(c)));
480}
481
482fn remember_repeated_window(
487 repeated_windows: &mut FxHashMap<u64, Vec<Occurrence>>,
488 hash: u64,
489 occurrence: Occurrence,
490 cap: usize,
491) {
492 let bucket = repeated_windows.entry(hash).or_default();
493 if bucket
494 .iter()
495 .any(|s| s.source_id == occurrence.source_id && s.token_start == occurrence.token_start)
496 {
497 return;
498 }
499 if bucket.len() < cap {
500 bucket.push(occurrence);
501 }
502}
503
504struct SecondaryOpen {
505 clone: CpdClone,
506 source_a: usize,
507 source_b: usize,
508 last_token_start_a: usize,
509 last_token_start_b: usize,
510}
511
512fn add_secondary_clones(
513 repeated_windows: FxHashMap<u64, Vec<Occurrence>>,
514 prepared: &[PreparedSource],
515 min_tokens: usize,
516 skip_local: bool,
517 min_lines: usize,
518 clones: &mut Vec<CpdClone>,
519) {
520 if repeated_windows.is_empty() {
521 return;
522 }
523
524 #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
525 struct Candidate {
526 source_a: usize,
527 source_b: usize,
528 token_a: usize,
529 token_b: usize,
530 }
531
532 let mut candidates: Vec<Candidate> = Vec::new();
533 for occurrences in repeated_windows.values() {
534 if occurrences.len() < 2 {
535 continue;
536 }
537 for li in 0..occurrences.len() {
538 for ri in li + 1..occurrences.len() {
539 let left = &occurrences[li];
540 let right = &occurrences[ri];
541 if left.source_id == right.source_id && left.token_start == right.token_start {
542 continue;
543 }
544 let lh = &prepared[left.source_id].hashes;
545 let rh = &prepared[right.source_id].hashes;
546 let la = left.token_start;
547 let ra = right.token_start;
548 if la + min_tokens > lh.len() || ra + min_tokens > rh.len() {
549 continue;
550 }
551 if lh[la..la + min_tokens] != rh[ra..ra + min_tokens] {
552 continue;
553 }
554 let (sa, ta, sb, tb) =
555 if (left.source_id, left.token_start) <= (right.source_id, right.token_start) {
556 (
557 left.source_id,
558 left.token_start,
559 right.source_id,
560 right.token_start,
561 )
562 } else {
563 (
564 right.source_id,
565 right.token_start,
566 left.source_id,
567 left.token_start,
568 )
569 };
570 candidates.push(Candidate {
571 source_a: sa,
572 source_b: sb,
573 token_a: ta,
574 token_b: tb,
575 });
576 }
577 }
578 }
579 if candidates.is_empty() {
580 return;
581 }
582 candidates.sort_unstable();
583 candidates.dedup();
584
585 let mut coverage = LineCoverage::from_clones(prepared, clones);
587 let mut open: Option<SecondaryOpen> = None;
588
589 for candidate in candidates {
590 if let Some(current) = open.as_mut()
591 && current.source_a == candidate.source_a
592 && current.source_b == candidate.source_b
593 && current.last_token_start_a + 1 == candidate.token_a
594 && current.last_token_start_b + 1 == candidate.token_b
595 {
596 let new_match_len = current.clone.token_count as usize + 1;
598 let end_idx_a = candidate.token_a + min_tokens;
599 let end_idx_b = candidate.token_b + min_tokens;
600 if let Some(frag_a_end) = prepared[current.source_a].spans.get(end_idx_a) {
601 current.clone.fragment_a.end = frag_a_end.1.clone();
602 current.clone.fragment_a.range[1] = end_idx_a as u32;
603 }
604 if let Some(frag_b_end) = prepared[current.source_b].spans.get(end_idx_b) {
605 current.clone.fragment_b.end = frag_b_end.1.clone();
606 current.clone.fragment_b.range[1] = end_idx_b as u32;
607 }
608 current.clone.token_count = new_match_len as u32;
609 current.last_token_start_a = candidate.token_a;
610 current.last_token_start_b = candidate.token_b;
611 continue;
612 }
613
614 flush_secondary_clone(
615 open.take(),
616 prepared,
617 skip_local,
618 min_lines,
619 clones,
620 &mut coverage,
621 );
622
623 let start_a = candidate.token_a;
625 let end_a = start_a + min_tokens - 1;
626 let start_b = candidate.token_b;
627 let end_b = start_b + min_tokens - 1;
628
629 let frag_a = match make_fragment(
630 &prepared[candidate.source_a].id,
631 &prepared[candidate.source_a].spans,
632 start_a,
633 end_a,
634 ) {
635 Some(f) => f,
636 None => continue,
637 };
638 let frag_b = match make_fragment(
639 &prepared[candidate.source_b].id,
640 &prepared[candidate.source_b].spans,
641 start_b,
642 end_b,
643 ) {
644 Some(f) => f,
645 None => continue,
646 };
647
648 open = Some(SecondaryOpen {
649 clone: CpdClone {
650 format: prepared[candidate.source_a].format.clone(),
651 fragment_a: frag_a,
652 fragment_b: frag_b,
653 token_count: min_tokens as u32,
654 },
655 source_a: candidate.source_a,
656 source_b: candidate.source_b,
657 last_token_start_a: candidate.token_a,
658 last_token_start_b: candidate.token_b,
659 });
660 }
661
662 flush_secondary_clone(
663 open.take(),
664 prepared,
665 skip_local,
666 min_lines,
667 clones,
668 &mut coverage,
669 );
670}
671
672fn flush_secondary_clone(
673 open: Option<SecondaryOpen>,
674 prepared: &[PreparedSource],
675 skip_local: bool,
676 min_lines: usize,
677 clones: &mut Vec<CpdClone>,
678 coverage: &mut LineCoverage,
679) {
680 let Some(oc) = open else {
681 return;
682 };
683
684 let range_a = fragment_line_range(&oc.clone.fragment_a);
685 let range_b = fragment_line_range(&oc.clone.fragment_b);
686
687 if skip_local {
689 let dir_a = Path::new(&prepared[oc.source_a].id).parent();
690 let dir_b = Path::new(&prepared[oc.source_b].id).parent();
691 if dir_a == dir_b {
692 return;
693 }
694 }
695
696 if min_lines > 0 {
698 let lines = oc.clone.fragment_a.end.line as usize - oc.clone.fragment_a.start.line as usize;
699 if lines < min_lines {
700 return;
701 }
702 }
703
704 if !coverage.extends(oc.source_a, range_a) || !coverage.extends(oc.source_b, range_b) {
708 return;
709 }
710
711 let before = clones.len();
712 clones.push(oc.clone);
713
714 if clones.len() > before {
716 coverage.insert(oc.source_a, range_a);
717 coverage.insert(oc.source_b, range_b);
718 }
719}
720
721fn fragment_line_range(fragment: &Fragment) -> (usize, usize) {
722 let start = fragment.start.line as usize;
723 let end = fragment.end.line as usize;
724 (start.min(end), start.max(end))
725}
726
727struct LineCoverage {
732 ranges_by_source: Vec<Vec<(usize, usize)>>,
733}
734
735impl LineCoverage {
736 fn from_clones(prepared: &[PreparedSource], clones: &[CpdClone]) -> Self {
737 let mut source_lookup: FxHashMap<&str, usize> = FxHashMap::default();
738 for (idx, source) in prepared.iter().enumerate() {
739 source_lookup.insert(source.id.as_str(), idx);
740 }
741 let mut coverage = Self {
742 ranges_by_source: vec![Vec::new(); prepared.len()],
743 };
744 for clone in clones {
745 if let Some(idx) = source_lookup.get(clone.fragment_a.source_id.as_str()) {
746 coverage.insert(*idx, fragment_line_range(&clone.fragment_a));
747 }
748 if let Some(idx) = source_lookup.get(clone.fragment_b.source_id.as_str()) {
749 coverage.insert(*idx, fragment_line_range(&clone.fragment_b));
750 }
751 }
752 coverage
753 }
754
755 fn extends(&self, source_idx: usize, range: (usize, usize)) -> bool {
756 let Some(ranges) = self.ranges_by_source.get(source_idx) else {
757 return true;
758 };
759 let mut next_line = range.0;
760 for &(start, end) in ranges {
761 if end < next_line {
762 continue;
763 }
764 if start > next_line {
765 return true;
766 }
767 next_line = next_line.max(end.saturating_add(1));
768 if next_line > range.1 {
769 return false;
770 }
771 }
772 next_line <= range.1
773 }
774
775 fn insert(&mut self, source_idx: usize, range: (usize, usize)) {
776 let Some(ranges) = self.ranges_by_source.get_mut(source_idx) else {
777 return;
778 };
779 ranges.push(range);
780 ranges.sort_unstable();
781
782 let mut merged: Vec<(usize, usize)> = Vec::with_capacity(ranges.len());
783 for &(start, end) in ranges.iter() {
784 if let Some((_, previous_end)) = merged.last_mut()
785 && start <= previous_end.saturating_add(1)
786 {
787 *previous_end = (*previous_end).max(end);
788 continue;
789 }
790 merged.push((start, end));
791 }
792 *ranges = merged;
793 }
794}
795
796#[cfg(test)]
801mod tests {
802 use super::*;
803 use crate::models::{Location, Token, TokenKind};
804
805 fn loc(line: u32, col: u32, offset: u32) -> Location {
806 Location {
807 line,
808 column: col,
809 offset,
810 }
811 }
812
813 fn make_token(kind: TokenKind, value: &str, line: u32, col: u32, offset: u32) -> Token {
814 let end_col = col + value.len() as u32;
815 let end_off = offset + value.len() as u32;
816 Token {
817 kind,
818 value: value.to_string(),
819 start: loc(line, col, offset),
820 end: loc(line, end_col, end_off),
821 }
822 }
823
824 fn make_file(id: &str, format: &str, tokens: Vec<Token>) -> SourceFile {
825 SourceFile {
826 id: id.to_string(),
827 format: format.to_string(),
828 tokens,
829 }
830 }
831
832 fn js_tokens_ab() -> Vec<Token> {
833 vec![
834 make_token(TokenKind::Keyword, "function", 1, 0, 0),
835 make_token(TokenKind::Other, "hello", 1, 9, 9),
836 make_token(TokenKind::Operator, "(", 1, 14, 14),
837 make_token(TokenKind::Operator, ")", 1, 15, 15),
838 make_token(TokenKind::Operator, "{", 1, 16, 16),
839 make_token(TokenKind::Keyword, "return", 2, 0, 18),
840 make_token(TokenKind::Literal, "42", 2, 7, 25),
841 make_token(TokenKind::Operator, ";", 2, 9, 27),
842 make_token(TokenKind::Operator, "}", 3, 0, 29),
843 ]
844 }
845
846 #[test]
847 fn empty_input_returns_empty() {
848 let result = detect(&[], 10);
849 assert!(result.is_empty());
850 }
851
852 #[test]
853 fn identical_files_detected_as_clone() {
854 let tokens = js_tokens_ab();
855 let file_a = make_file("a.js", "javascript", tokens.clone());
856 let file_b = make_file("b.js", "javascript", tokens);
857 let clones = detect(&[file_a, file_b], 5);
858 assert!(
859 !clones.is_empty(),
860 "identical files must produce at least one clone"
861 );
862 }
863
864 #[test]
865 fn min_tokens_threshold_respected() {
866 let tokens = js_tokens_ab(); let file_a = make_file("a.js", "javascript", tokens.clone());
868 let file_b = make_file("b.js", "javascript", tokens);
869 let clones = detect(&[file_a, file_b], 100);
870 assert!(
871 clones.is_empty(),
872 "no clones when min_tokens exceeds file length"
873 );
874 }
875
876 #[test]
877 fn deduplication_ab_ba_collapse() {
878 let tokens = js_tokens_ab();
879 let file_a = make_file("a.js", "javascript", tokens.clone());
880 let file_b = make_file("b.js", "javascript", tokens);
881 let clones = detect(&[file_a, file_b], 5);
882 assert_eq!(clones.len(), 1, "symmetric pairs must collapse to 1");
884 }
885
886 #[test]
887 fn different_formats_not_cross_detected() {
888 let tokens = js_tokens_ab();
889 let file_js = make_file("a.js", "javascript", tokens.clone());
890 let file_py = make_file("a.py", "python", tokens);
891 let clones = detect(&[file_js, file_py], 5);
892 assert!(
893 clones.is_empty(),
894 "tokens from different formats must not match"
895 );
896 }
897
898 #[test]
899 fn identical_files_maximal_clone() {
900 let tokens = js_tokens_ab();
903 let file_a = make_file("a.js", "javascript", tokens.clone());
904 let file_b = make_file("b.js", "javascript", tokens);
905 let clones = detect(&[file_a, file_b], 5);
906 assert_eq!(
907 clones.len(),
908 1,
909 "open_clone SM must produce one maximal clone"
910 );
911 assert_eq!(
912 clones[0].token_count, 9,
913 "maximal clone must cover all 9 tokens"
914 );
915 }
916
917 #[test]
918 fn three_identical_files_secondary_pass_adds_missing_pair() {
919 let tokens = js_tokens_ab();
920 let file_a = make_file("a.js", "javascript", tokens.clone());
921 let file_b = make_file("b.js", "javascript", tokens.clone());
922 let file_c = make_file("c.js", "javascript", tokens);
923 let clones = detect(&[file_a, file_b, file_c], 5);
924 assert!(
925 clones.len() >= 2,
926 "three identical files must yield at least 2 clone pairs, got {}",
927 clones.len()
928 );
929 }
930
931 #[test]
932 fn clones_sorted_by_source_and_line() {
933 let tokens = js_tokens_ab();
934 let file_a = make_file("a.js", "javascript", tokens.clone());
935 let file_b = make_file("b.js", "javascript", tokens);
936 let clones = detect(&[file_a, file_b], 5);
937 for i in 1..clones.len() {
938 let prev = &clones[i - 1];
939 let curr = &clones[i];
940 assert!(
941 (
942 &prev.fragment_a.source_id,
943 prev.fragment_a.start.line,
944 &prev.fragment_b.source_id,
945 prev.fragment_b.start.line,
946 ) <= (
947 &curr.fragment_a.source_id,
948 curr.fragment_a.start.line,
949 &curr.fragment_b.source_id,
950 curr.fragment_b.start.line,
951 ),
952 "clones must be sorted"
953 );
954 }
955 }
956}