Skip to main content

scute_core/code_similarity/
check.rs

1use std::collections::HashMap;
2use std::path::{Path, PathBuf};
3
4use super::language::{self, LanguageConfig};
5use super::{CloneGroup, Occurrence, SourceEntry, TreeSitterParser, find_clones};
6use crate::parser::AstParser;
7use serde::Deserialize;
8
9use crate::files;
10use crate::{Evaluation, Evidence, ExecutionError, Thresholds};
11
12pub const CHECK_NAME: &str = "code-similarity";
13
14const DEFAULT_MIN_TOKENS: usize = 50;
15const DEFAULT_WARN: u64 = 70;
16const DEFAULT_FAIL: u64 = 100;
17const DEFAULT_TEST_WARN: u64 = 100;
18const DEFAULT_TEST_FAIL: u64 = 130;
19
20/// Configuration for the code similarity check.
21///
22/// All fields are optional and fall back to sensible defaults when absent.
23///
24/// ```
25/// use scute_core::code_similarity::Definition;
26///
27/// // Zero-config: uses default min_tokens (50) and thresholds (warn: 70, fail: 100)
28/// let default = Definition::default();
29///
30/// // Custom: catch smaller clones, tighter thresholds
31/// let strict = Definition {
32///     min_tokens: Some(10),
33///     thresholds: Some(scute_core::Thresholds { warn: Some(15), fail: Some(30) }),
34///     ..Definition::default()
35/// };
36/// ```
37#[derive(Debug, Default, Deserialize)]
38#[serde(deny_unknown_fields)]
39pub struct Definition {
40    /// Minimum token count for a sequence to be considered a clone.
41    /// Defaults to 50.
42    #[serde(alias = "min-tokens")]
43    pub min_tokens: Option<usize>,
44    pub thresholds: Option<Thresholds>,
45    /// Skip files matching `.gitignore`, `.ignore`, and hidden paths.
46    /// Defaults to `true`.
47    #[serde(alias = "skip-ignored-files")]
48    pub skip_ignored_files: Option<bool>,
49    /// Separate thresholds for clone groups where every occurrence lives
50    /// in test code. Defaults to warn: 100, fail: 130.
51    #[serde(alias = "test-thresholds")]
52    pub test_thresholds: Option<Thresholds>,
53    /// Glob patterns for files to exclude from similarity analysis.
54    pub exclude: Option<Vec<String>>,
55}
56
57/// Check a directory for code duplication.
58///
59/// Discovers supported source files (Rust, JavaScript, TypeScript), runs
60/// clone detection, and returns one [`Evaluation`] per clone group found.
61/// When no clones are detected, returns a single passing evaluation.
62///
63/// When `focus_files` is non-empty, only clone groups involving at least
64/// one focus file are reported. An empty slice means full-project scan.
65/// Focus files with unsupported extensions or that can't be read produce
66/// errored evaluations.
67///
68/// ```no_run
69/// use std::path::Path;
70/// use scute_core::code_similarity::{Definition, check};
71///
72/// let evals = check(Path::new("."), &[], &Definition::default()).unwrap();
73/// for eval in &evals {
74///     if eval.is_fail() {
75///         eprintln!("duplication found: {}", eval.target);
76///     }
77/// }
78/// ```
79///
80/// # Errors
81///
82/// Returns `ExecutionError` if `source_dir` is not a valid directory.
83pub fn check(
84    source_dir: &Path,
85    focus_files: &[PathBuf],
86    definition: &Definition,
87) -> Result<Vec<Evaluation>, ExecutionError> {
88    let min_tokens = definition.min_tokens.unwrap_or(DEFAULT_MIN_TOKENS);
89    let thresholds = definition.thresholds.clone().unwrap_or(Thresholds {
90        warn: Some(DEFAULT_WARN),
91        fail: Some(DEFAULT_FAIL),
92    });
93
94    let canonical_dir = files::validate_source_dir(source_dir).map_err(|e| ExecutionError {
95        code: "invalid_target".into(),
96        message: e.to_string(),
97        recovery: "check that the path exists and is a directory".into(),
98    })?;
99    let focus_files = match files::validate_focus_files(
100        focus_files,
101        &["rs", "js", "jsx", "mjs", "cjs", "ts", "tsx"],
102        "only Rust, JavaScript, and TypeScript files are supported",
103    ) {
104        Ok(files) => files,
105        Err(errors) => return Ok(errors),
106    };
107
108    let skip_ignored = definition.skip_ignored_files.unwrap_or(true);
109    let exclude = definition.exclude.as_deref().unwrap_or_default();
110    let sources = read_sources(&canonical_dir, skip_ignored, exclude);
111    let clone_groups = detect_clones(&sources, min_tokens)?;
112    let relevant = filter_by_focus(&clone_groups, &focus_files);
113
114    if relevant.is_empty() {
115        return Ok(vec![Evaluation::completed(
116            source_dir.display().to_string(),
117            0,
118            thresholds,
119            vec![],
120        )]);
121    }
122
123    let test_thresholds = definition.test_thresholds.clone().unwrap_or(Thresholds {
124        warn: Some(DEFAULT_TEST_WARN),
125        fail: Some(DEFAULT_TEST_FAIL),
126    });
127    Ok(build_evaluations(
128        &relevant,
129        &sources,
130        &thresholds,
131        &test_thresholds,
132    ))
133}
134
135fn filter_by_focus<'a>(
136    clone_groups: &'a [CloneGroup],
137    focus_files: &[PathBuf],
138) -> Vec<&'a CloneGroup> {
139    let focus_strings: Vec<String> = focus_files
140        .iter()
141        .map(|p| p.display().to_string())
142        .collect();
143
144    clone_groups
145        .iter()
146        .filter(|group| {
147            focus_strings.is_empty()
148                || group
149                    .occurrences
150                    .iter()
151                    .any(|occ| focus_strings.contains(&occ.source_id))
152        })
153        .collect()
154}
155
156fn read_sources(
157    dir: &Path,
158    skip_ignored: bool,
159    exclude: &[String],
160) -> Vec<(String, String, &'static LanguageConfig)> {
161    discover_files(dir, skip_ignored, exclude)
162        .into_iter()
163        .filter_map(|(path, lang)| {
164            let content = std::fs::read_to_string(&path).ok()?;
165            Some((path.display().to_string(), content, lang))
166        })
167        .collect()
168}
169
170fn detect_clones(
171    sources: &[(String, String, &'static LanguageConfig)],
172    min_tokens: usize,
173) -> Result<Vec<CloneGroup>, ExecutionError> {
174    let entries: Vec<SourceEntry<'_>> = sources
175        .iter()
176        .map(|(path, content, lang)| SourceEntry::new(content, path, lang))
177        .collect();
178    find_clones(&entries, min_tokens).map_err(|e| ExecutionError {
179        code: "detection_failed".into(),
180        message: e.to_string(),
181        recovery: "check that source files are valid".into(),
182    })
183}
184
185fn build_evaluations(
186    groups: &[&CloneGroup],
187    sources: &[(String, String, &'static LanguageConfig)],
188    thresholds: &Thresholds,
189    test_thresholds: &Thresholds,
190) -> Vec<Evaluation> {
191    let mut parser = TreeSitterParser::new();
192    let source_by_path: HashMap<&str, (&str, &'static LanguageConfig)> = sources
193        .iter()
194        .map(|(path, content, lang)| (path.as_str(), (content.as_str(), *lang)))
195        .collect();
196    groups
197        .iter()
198        .map(|group| {
199            let effective = if is_test_only_group(&mut parser, group, &source_by_path) {
200                test_thresholds
201            } else {
202                thresholds
203            };
204            to_evaluation(group, effective, &source_by_path)
205        })
206        .collect()
207}
208
209fn is_test_only_group(
210    parser: &mut dyn AstParser,
211    group: &CloneGroup,
212    sources: &HashMap<&str, (&str, &'static LanguageConfig)>,
213) -> bool {
214    group.occurrences.iter().all(|occ| {
215        sources
216            .get(occ.source_id.as_str())
217            .is_some_and(|(content, lang)| {
218                lang.is_test_context(
219                    parser,
220                    Path::new(&occ.source_id),
221                    content,
222                    occ.start_line,
223                    occ.end_line,
224                )
225            })
226    })
227}
228
229fn discover_files(
230    dir: &Path,
231    skip_ignored: bool,
232    exclude: &[String],
233) -> Vec<(PathBuf, &'static LanguageConfig)> {
234    let mut result: Vec<_> = files::walk_source_files(dir, skip_ignored, exclude)
235        .filter_map(|e| {
236            let lang = language_for_path(e.path())?;
237            Some((e.into_path(), lang))
238        })
239        .collect();
240    result.sort_by(|(a, _), (b, _)| a.cmp(b));
241    result
242}
243
244fn language_for_path(path: &Path) -> Option<&'static LanguageConfig> {
245    static RUST: std::sync::LazyLock<LanguageConfig> = std::sync::LazyLock::new(language::rust);
246    static JAVASCRIPT: std::sync::LazyLock<LanguageConfig> =
247        std::sync::LazyLock::new(language::javascript);
248    static TYPESCRIPT: std::sync::LazyLock<LanguageConfig> =
249        std::sync::LazyLock::new(language::typescript);
250    static TYPESCRIPT_TSX: std::sync::LazyLock<LanguageConfig> =
251        std::sync::LazyLock::new(language::typescript_tsx);
252
253    match path.extension()?.to_str()? {
254        "rs" => Some(&RUST),
255        "js" | "jsx" | "mjs" | "cjs" => Some(&JAVASCRIPT),
256        "ts" => Some(&TYPESCRIPT),
257        "tsx" => Some(&TYPESCRIPT_TSX),
258        _ => None,
259    }
260}
261
262/// A line is "trivial" if it's only punctuation and whitespace (closing braces,
263/// semicolons, etc.). We skip these when picking a representative snippet.
264fn is_trivial_line(line: &str) -> bool {
265    let trimmed = line.trim();
266    trimmed.is_empty() || trimmed.chars().all(|c| c.is_ascii_punctuation())
267}
268
269fn occurrence_evidence(
270    occ: &Occurrence,
271    token_count: usize,
272    sources: &HashMap<&str, (&str, &'static LanguageConfig)>,
273) -> Evidence {
274    let line_count = occ.end_line.saturating_sub(occ.start_line) + 1;
275    let snippet = sources
276        .get(occ.source_id.as_str())
277        .and_then(|(content, _)| {
278            content
279                .lines()
280                .skip(occ.start_line.saturating_sub(1))
281                .take(line_count)
282                .map(str::trim)
283                .find(|line| !is_trivial_line(line))
284        });
285
286    let found = match snippet {
287        Some(line) => format!("{token_count} duplicated tokens, e.g. `{line}`"),
288        None => format!("{token_count} duplicated tokens"),
289    };
290
291    Evidence {
292        rule: None,
293        location: Some(format!(
294            "{}:{}-{}",
295            occ.source_id, occ.start_line, occ.end_line
296        )),
297        found,
298        expected: None,
299    }
300}
301
302fn to_evaluation(
303    group: &CloneGroup,
304    thresholds: &Thresholds,
305    sources: &HashMap<&str, (&str, &'static LanguageConfig)>,
306) -> Evaluation {
307    let evidence = group
308        .occurrences
309        .iter()
310        .map(|occ| occurrence_evidence(occ, group.token_count, sources))
311        .collect();
312
313    let observed = u64::try_from(group.token_count).unwrap_or(u64::MAX);
314
315    Evaluation::completed(
316        group
317            .occurrences
318            .first()
319            .map(|occ| format!("{}:{}", occ.source_id, occ.start_line))
320            .unwrap_or_default(),
321        observed,
322        thresholds.clone(),
323        evidence,
324    )
325}
326
327#[cfg(test)]
328mod tests {
329    use super::*;
330    use crate::Outcome;
331    use googletest::prelude::*;
332    use tempfile::TempDir;
333
334    fn low_threshold() -> Definition {
335        Definition {
336            min_tokens: Some(5),
337            thresholds: Some(Thresholds {
338                warn: Some(5),
339                fail: Some(10),
340            }),
341            test_thresholds: Some(Thresholds {
342                warn: Some(10),
343                fail: Some(30),
344            }),
345            ..Definition::default()
346        }
347    }
348
349    fn check_dir(dir: &Path) -> Vec<Evaluation> {
350        check(dir, &[], &low_threshold()).unwrap()
351    }
352
353    fn check_focused(dir: &Path, focus_files: &[PathBuf]) -> Vec<Evaluation> {
354        check(dir, focus_files, &low_threshold()).unwrap()
355    }
356
357    /// Create a temp directory with the given files and run a similarity check.
358    /// Returns `(TempDir, Vec<Evaluation>)` — caller keeps `TempDir` alive for
359    /// any assertions that reference paths.
360    fn check_files(files: &[(&str, &str)]) -> (TempDir, Vec<Evaluation>) {
361        let dir = make_dir(files);
362        let evals = check_dir(dir.path());
363        (dir, evals)
364    }
365
366    fn make_dir(files: &[(&str, &str)]) -> TempDir {
367        let dir = TempDir::new().unwrap();
368        for (name, content) in files {
369            write_file(dir.path(), name, content);
370        }
371        dir
372    }
373
374    fn check_clone_pair() -> Vec<Evaluation> {
375        check_files(CLONE_PAIR).1
376    }
377
378    fn check_clone_pair_with_thresholds(warn: u64, fail: u64) -> Vec<Evaluation> {
379        let dir = make_dir(CLONE_PAIR);
380        check(
381            dir.path(),
382            &[],
383            &Definition {
384                min_tokens: Some(5),
385                thresholds: Some(Thresholds {
386                    warn: Some(warn),
387                    fail: Some(fail),
388                }),
389                ..Definition::default()
390            },
391        )
392        .unwrap()
393    }
394
395    const CLONE_PAIR: &[(&str, &str)] = &[
396        ("a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
397        ("b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
398    ];
399
400    fn two_clone_pairs_dir() -> TempDir {
401        let mut files = CLONE_PAIR.to_vec();
402        files.extend_from_slice(&[
403            ("c.rs", "const A: [i32; 5] = [10, 20, 30, 40, 50];"),
404            ("d.rs", "const B: [u32; 5] = [60, 70, 80, 90, 100];"),
405        ]);
406        make_dir(&files)
407    }
408
409    fn write_file(dir: &Path, name: &str, content: &str) {
410        if let Some(parent) = Path::new(name).parent() {
411            std::fs::create_dir_all(dir.join(parent)).unwrap();
412        }
413        std::fs::write(dir.join(name), content).unwrap();
414    }
415
416    fn unwrap_evidence(eval: &Evaluation) -> &Vec<Evidence> {
417        let Outcome::Completed { evidence, .. } = &eval.outcome else {
418            panic!("expected completed evaluation")
419        };
420        evidence
421    }
422
423    fn assert_location_contains(evidence: &[Evidence], substring: &str) {
424        assert_that!(
425            evidence,
426            contains(matches_pattern!(Evidence {
427                location: some(contains_substring(substring)),
428                ..
429            }))
430        );
431    }
432
433    #[test]
434    fn empty_directory_passes_with_zero_observed() {
435        let dir = TempDir::new().unwrap();
436
437        let evals = check_dir(dir.path());
438
439        assert_that!(evals, len(eq(1)));
440        assert!(evals[0].is_pass());
441    }
442
443    #[test]
444    fn clone_exceeding_fail_threshold_produces_fail_status() {
445        let evals = check_clone_pair_with_thresholds(10, 12);
446
447        assert!(evals[0].is_fail()); // 14 tokens > fail threshold of 12
448    }
449
450    #[test]
451    fn clone_below_thresholds_produces_pass_status() {
452        let evals = check_clone_pair_with_thresholds(20, 30);
453
454        assert!(evals[0].is_pass()); // 14 tokens < warn threshold of 20
455    }
456
457    #[test]
458    fn observed_value_is_token_count_of_the_clone() {
459        let evals = check_clone_pair();
460
461        let Outcome::Completed { observed, .. } = &evals[0].outcome else {
462            panic!("expected completed evaluation")
463        };
464        assert_that!(*observed, eq(14)); // fn $ID ( $ID : $ID ) -> $ID { $ID + $LIT } = 14 tokens
465    }
466
467    #[test]
468    fn directory_with_only_unsupported_files_passes() {
469        let (_, evals) = check_files(&[("readme.md", "# Hello"), ("data.json", "{}")]);
470
471        assert_that!(evals, len(eq(1)));
472        assert!(evals[0].is_pass());
473    }
474
475    #[test]
476    fn discovers_files_in_subdirectories() {
477        let (_, evals) = check_files(&[
478            ("src/a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
479            ("lib/b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
480        ]);
481
482        assert_that!(evals, len(eq(1)));
483        let evidence = unwrap_evidence(&evals[0]);
484        assert_location_contains(evidence, "src");
485        assert_location_contains(evidence, "lib");
486    }
487
488    fn gitignore_dir() -> TempDir {
489        let dir = make_dir(&[
490            (".gitignore", "vendor/\n"),
491            ("src/a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
492            ("vendor/lib/b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
493        ]);
494        std::fs::create_dir(dir.path().join(".git")).unwrap();
495        dir
496    }
497
498    #[test]
499    fn skips_gitignored_directories() {
500        let dir = gitignore_dir();
501
502        let evals = check_dir(dir.path());
503
504        // vendor/ is gitignored → only src/a.rs discovered → no clone pair
505        assert!(
506            evals.iter().all(Evaluation::is_pass),
507            "vendor/ should be excluded, got: {evals:?}"
508        );
509    }
510
511    #[test]
512    fn skip_ignored_files_false_scans_gitignored_directories() {
513        let dir = gitignore_dir();
514
515        let evals = check(
516            dir.path(),
517            &[],
518            &Definition {
519                skip_ignored_files: Some(false),
520                ..low_threshold()
521            },
522        )
523        .unwrap();
524
525        // With skip_ignored_files disabled, vendor/ is scanned → clone pair found
526        assert!(
527            evals.iter().any(|e| !e.is_pass()),
528            "vendor/ should be scanned when skip_ignored_files is false, got: {evals:?}"
529        );
530    }
531
532    #[test]
533    fn evidence_contains_all_occurrence_locations() {
534        let evals = check_clone_pair();
535
536        let evidence = unwrap_evidence(&evals[0]);
537        assert_that!(evidence, len(eq(2)));
538        assert_location_contains(evidence, "a.rs");
539        assert_location_contains(evidence, "b.rs");
540    }
541
542    #[test]
543    fn evidence_snippets_reflect_each_occurrence() {
544        let evals = check_clone_pair();
545
546        let evidence = unwrap_evidence(&evals[0]);
547        assert_that!(evidence[0].found, contains_substring("fn foo"));
548        assert_that!(evidence[1].found, contains_substring("fn bar"));
549    }
550
551    #[test]
552    fn duplicated_code_returns_one_evaluation_per_clone_group() {
553        let evals = check_clone_pair();
554
555        assert_that!(evals, len(eq(1)));
556    }
557
558    #[test]
559    fn nonexistent_target_returns_error() {
560        let result = check(Path::new("/nonexistent/path"), &[], &Definition::default());
561
562        let err = result.unwrap_err();
563        assert_that!(err.code, eq("invalid_target"));
564    }
565
566    #[test]
567    fn distinct_code_passes() {
568        let (_, evals) = check_files(&[
569            ("a.rs", "let x = 1 + 2;"),
570            ("b.rs", "if true { return false; }"),
571        ]);
572
573        assert_that!(evals, len(eq(1)));
574        assert!(evals[0].is_pass());
575    }
576
577    #[test_case::test_case(
578        &[("a.ts", "function foo(x: number): number { return x + 1; }"),
579          ("b.ts", "function bar(y: number): number { return y + 1; }")]
580        ; "typescript"
581    )]
582    #[test_case::test_case(
583        &[("a.js", "function foo(x) { return x + 1; }"),
584          ("b.js", "function bar(y) { return y + 1; }")]
585        ; "javascript"
586    )]
587    #[test_case::test_case(
588        &[("a.jsx", "function Greeting({ name }) { return <div>Hello {name}</div>; }"),
589          ("b.jsx", "function Welcome({ name }) { return <div>Hello {name}</div>; }")]
590        ; "jsx"
591    )]
592    #[test_case::test_case(
593        &[("a.js", "function foo(x) { return x + 1; }"),
594          ("b.mjs", "function bar(y) { return y + 1; }")]
595        ; "across js and mjs"
596    )]
597    #[test_case::test_case(
598        &[("a.js", "function foo(x) { return x + 1; }"),
599          ("b.cjs", "function bar(y) { return y + 1; }")]
600        ; "across js and cjs"
601    )]
602    #[test_case::test_case(
603        &[("a.tsx", "function Greeting({ name }: { name: string }) { return <div>Hello {name}</div>; }"),
604          ("b.tsx", "function Welcome({ name }: { name: string }) { return <div>Hello {name}</div>; }")]
605        ; "tsx"
606    )]
607    #[test_case::test_case(
608        &[("a.ts", "function foo(x: number): number { return x + 1; }"),
609          ("b.tsx", "function bar(y: number): number { return y + 1; }")]
610        ; "across ts and tsx"
611    )]
612    fn detects_duplications(files: &[(&str, &str)]) {
613        let (_, evals) = check_files(files);
614        assert_that!(evals, len(eq(1)));
615        assert!(evals[0].is_fail(), "expected fail, got: {evals:?}");
616    }
617
618    #[test]
619    fn focus_file_only_reports_clone_groups_involving_that_file() {
620        let dir = two_clone_pairs_dir();
621
622        let evals = check_focused(dir.path(), &[dir.path().join("a.rs")]);
623
624        assert_that!(evals, len(eq(1)));
625        let evidence = unwrap_evidence(&evals[0]);
626        assert_location_contains(evidence, "a.rs");
627        assert_location_contains(evidence, "b.rs");
628    }
629
630    #[test]
631    fn focus_file_without_clones_passes() {
632        let dir = make_dir(&[
633            ("clean.rs", "fn unique_stuff() -> bool { true }"),
634            ("a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
635            ("b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
636        ]);
637
638        let evals = check_focused(dir.path(), &[dir.path().join("clean.rs")]);
639
640        assert_that!(evals, len(eq(1)));
641        assert!(evals[0].is_pass());
642    }
643
644    #[test]
645    fn multiple_focus_files_report_clones_involving_any_of_them() {
646        let dir = two_clone_pairs_dir();
647
648        let evals = check_focused(
649            dir.path(),
650            &[dir.path().join("a.rs"), dir.path().join("c.rs")],
651        );
652
653        assert_that!(evals, len(eq(2)));
654    }
655
656    #[test_case::test_case(
657        &[("tests/a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
658          ("tests/b.rs", "fn bar(y: i32) -> i32 { y + 1 }")]
659        ; "test directory clones"
660    )]
661    #[test_case::test_case(
662        &[("a.test.ts", "function foo(x: number): number { return x + 1; }"),
663          ("b.test.ts", "function bar(y: number): number { return y + 1; }")]
664        ; "typescript test files"
665    )]
666    #[test_case::test_case(
667        &[("a.test.js", "function foo(x) { return x + 1; }"),
668          ("b.test.js", "function bar(y) { return y + 1; }")]
669        ; "javascript test files"
670    )]
671    #[test_case::test_case(
672        &[("__tests__/a.js", "function foo(x) { return x + 1; }"),
673          ("__tests__/b.js", "function bar(y) { return y + 1; }")]
674        ; "js files in __tests__ directory"
675    )]
676    #[test_case::test_case(
677        &[("a.spec.ts", "function foo(x: number): number { return x + 1; }"),
678          ("b.spec.ts", "function bar(y: number): number { return y + 1; }")]
679        ; "spec ts files"
680    )]
681    #[test_case::test_case(
682        &[("a.test.tsx", "function Greeting({ name }: { name: string }) { return <div>Hello {name}</div>; }"),
683          ("b.test.tsx", "function Welcome({ name }: { name: string }) { return <div>Hello {name}</div>; }")]
684        ; "tsx test files"
685    )]
686    #[test_case::test_case(
687        &[("src/a.rs", "#[test]\nfn test_a(x: i32) -> i32 { x + 1 }"),
688          ("src/b.rs", "#[test]\nfn test_b(y: i32) -> i32 { y + 1 }")]
689        ; "naked test fns"
690    )]
691    #[test_case::test_case(
692        &[("src/a.rs", "fn serve() -> String { String::from(\"hello\") }\n\
693                         #[cfg(test)]\nmod tests {\n    fn helper_a(x: i32) -> i32 { x + 1 }\n}"),
694          ("src/b.rs", "use std::collections::HashMap;\n\
695                         #[cfg(test)]\nmod tests {\n    fn helper_b(y: i32) -> i32 { y + 1 }\n}")]
696        ; "inline rust test modules"
697    )]
698    fn applies_test_thresholds(files: &[(&str, &str)]) {
699        let (_, evals) = check_files(files);
700        assert!(
701            evals[0].is_warn(),
702            "expected warn (test thresholds), got: {evals:?}"
703        );
704    }
705
706    #[test]
707    fn uses_production_thresholds_for_mixed_test_and_production_clones() {
708        let (_, evals) = check_files(&[
709            ("src/a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
710            ("tests/b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
711        ]);
712
713        assert!(
714            evals[0].is_fail(),
715            "mixed groups should use production thresholds, got: {evals:?}"
716        );
717    }
718
719    #[test]
720    fn single_file_without_duplication_passes() {
721        let (_, evals) = check_files(&[("a.rs", "fn foo(x: i32) -> i32 { x + 1 }")]);
722
723        assert_that!(evals, len(eq(1)));
724        assert!(evals[0].is_pass());
725    }
726
727    #[test]
728    fn excludes_files_matching_a_glob_pattern() {
729        let dir = make_dir(CLONE_PAIR);
730
731        let evals = check(
732            dir.path(),
733            &[],
734            &Definition {
735                exclude: Some(vec!["b.rs".to_string()]),
736                ..low_threshold()
737            },
738        )
739        .unwrap();
740
741        assert!(
742            evals.iter().all(Evaluation::is_pass),
743            "b.rs should be excluded, got: {evals:?}"
744        );
745    }
746
747    #[test]
748    fn excludes_files_matching_multiple_glob_patterns() {
749        let dir = make_dir(&[
750            ("a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
751            ("b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
752            ("c.ts", "function baz(z: number): number { return z + 1; }"),
753        ]);
754
755        let evals = check(
756            dir.path(),
757            &[],
758            &Definition {
759                exclude: Some(vec!["b.rs".to_string(), "*.ts".to_string()]),
760                ..low_threshold()
761            },
762        )
763        .unwrap();
764
765        assert!(
766            evals.iter().all(Evaluation::is_pass),
767            "b.rs and *.ts should be excluded, got: {evals:?}"
768        );
769    }
770
771    #[test]
772    fn excludes_files_in_subdirectory_matching_glob_pattern() {
773        let dir = make_dir(&[
774            ("src/a.rs", "fn foo(x: i32) -> i32 { x + 1 }"),
775            ("generated/b.rs", "fn bar(y: i32) -> i32 { y + 1 }"),
776        ]);
777
778        let evals = check(
779            dir.path(),
780            &[],
781            &Definition {
782                exclude: Some(vec!["generated/**".to_string()]),
783                ..low_threshold()
784            },
785        )
786        .unwrap();
787
788        assert!(
789            evals.iter().all(Evaluation::is_pass),
790            "generated/** should be excluded, got: {evals:?}"
791        );
792    }
793
794    #[test]
795    fn default_definition_uses_sensible_defaults() {
796        let dir = make_dir(CLONE_PAIR);
797
798        // 14 tokens < default min_tokens of 50 → no clones detected → pass
799        let evals = check(dir.path(), &[], &Definition::default()).unwrap();
800
801        assert_that!(evals, len(eq(1)));
802        assert!(evals[0].is_pass());
803    }
804}