Skip to main content

big_code_analysis/
tools.rs

1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
8// Metric counts (token, function, branch, argument, etc.) are stored as
9// `usize` and crossed with `f64` averages, ratios, and Halstead scores
10// across the cyclomatic / MI / Halstead computations. The `usize as f64`
11// and `f64 as usize` casts are intentional and snapshot-anchored — every
12// site is bounded by the count it came from. Allowing the lints at the
13// module level keeps the metric arithmetic legible.
14#![allow(
15    clippy::cast_precision_loss,
16    clippy::cast_possible_truncation,
17    clippy::cast_sign_loss
18)]
19
20use std::cmp::Ordering;
21use std::collections::HashMap;
22use std::fs::{self, File};
23use std::io::{Read, Write};
24use std::path::{Component, Path, PathBuf};
25use std::sync::OnceLock;
26
27use regex::bytes::Regex;
28use termcolor::{Color, ColorSpec, StandardStreamLock, WriteColor};
29
30use crate::langs::fake;
31use crate::langs::*;
32
33/// Reads a file, normalising all CR-only and CRLF line endings to LF.
34///
35/// **Note for downstream consumers**: the returned buffer never contains `\r`
36/// bytes. Callers that previously observed raw `\r\n` sequences will see plain
37/// `\n` after this call. This is intentional — the metric engine requires LF-
38/// only input — but it is a behavioural difference from a plain `fs::read`.
39///
40/// # Errors
41///
42/// Returns any [`std::io::Error`] surfaced by [`File::open`] (the
43/// path is missing, lacks read permission, is a directory, …) or by
44/// [`File::read_to_end`] while reading the file contents.
45///
46/// # Examples
47///
48/// ```
49/// use std::path::Path;
50///
51/// use big_code_analysis::read_file;
52///
53/// let path = Path::new("Cargo.toml");
54/// read_file(&path).unwrap();
55/// ```
56pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
57    let mut file = File::open(path)?;
58    let mut data = Vec::new();
59    file.read_to_end(&mut data)?;
60
61    normalize_line_endings(&mut data);
62
63    Ok(data)
64}
65
66/// Reads a file, normalising all CR-only and CRLF line endings to LF, and ensures
67/// the buffer ends with exactly one `\n`. Returns `None` for files ≤ 3 bytes or
68/// files that appear to be non-UTF-8.
69///
70/// # Errors
71///
72/// Returns any [`std::io::Error`] surfaced by [`File::open`] (the
73/// path is missing, lacks read permission, is a directory, …) or by
74/// the subsequent reads from the open file handle. A non-UTF-8 head
75/// or a too-small file is reported via `Ok(None)`, not an error.
76///
77/// # Examples
78///
79/// ```
80/// use std::path::Path;
81///
82/// use big_code_analysis::read_file_with_eol;
83///
84/// let path = Path::new("Cargo.toml");
85/// read_file_with_eol(&path).unwrap();
86/// ```
87pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
88    let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
89    if file_size <= 3 {
90        // this file is very likely almost empty... so nothing to do on it
91        return Ok(None);
92    }
93
94    let mut file = File::open(path)?;
95
96    let mut start = vec![0; 64.min(file_size)];
97    let start = if file.read_exact(&mut start).is_ok() {
98        // Skip the bom if one
99        if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
100            &start[2..]
101        } else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
102            &start[3..]
103        } else {
104            &start
105        }
106    } else {
107        return Ok(None);
108    };
109
110    // so start contains more or less 64 chars
111    let mut head = String::from_utf8_lossy(start).into_owned();
112    // The last char could be wrong because we were in the middle of an utf-8 sequence
113    head.pop();
114    // now check if there is an invalid char
115    if head.contains('\u{FFFD}') {
116        return Ok(None);
117    }
118
119    let mut data = Vec::with_capacity(file_size + 2);
120    data.extend_from_slice(start);
121
122    file.read_to_end(&mut data)?;
123
124    normalize_line_endings(&mut data);
125
126    Ok(Some(data))
127}
128
129/// Writes data to a file.
130///
131/// # Errors
132///
133/// Returns any [`std::io::Error`] surfaced by [`File::create`]
134/// (parent directory missing, lacks write permission, target is a
135/// directory, …) or by [`File::write_all`] while writing the buffer.
136///
137/// # Examples
138///
139/// ```no_run
140/// use std::path::Path;
141///
142/// use big_code_analysis::write_file;
143///
144/// let path = Path::new("foo.txt");
145/// let data: [u8; 4] = [0; 4];
146/// write_file(&path, &data).unwrap();
147/// ```
148pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
149    let mut file = File::create(path)?;
150    file.write_all(data)?;
151
152    Ok(())
153}
154
155/// Detects the language of a code using
156/// the extension of a file.
157///
158/// # Examples
159///
160/// ```
161/// use std::path::Path;
162///
163/// use big_code_analysis::get_language_for_file;
164///
165/// let path = Path::new("build.rs");
166/// get_language_for_file(&path).unwrap();
167/// ```
168#[must_use]
169pub fn get_language_for_file(path: &Path) -> Option<LANG> {
170    if let Some(ext) = path.extension() {
171        let ext = ext.to_str()?.to_lowercase();
172        get_from_ext(&ext)
173    } else {
174        None
175    }
176}
177
178fn mode_to_str(mode: &[u8]) -> Option<String> {
179    std::str::from_utf8(mode).ok().map(str::to_lowercase)
180}
181
182// comment containing coding info are useful
183static RE1_EMACS: OnceLock<Regex> = OnceLock::new();
184static RE2_EMACS: OnceLock<Regex> = OnceLock::new();
185static RE1_VIM: OnceLock<Regex> = OnceLock::new();
186static RE_GENERATED: OnceLock<Regex> = OnceLock::new();
187
188// Regular expressions
189const FIRST_EMACS_EXPRESSION: &str = r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)";
190const SECOND_EMACS_EXPRESSION: &str = r"-\*-\s*([^:;\s]+)\s*-\*-";
191const VIM_EXPRESSION: &str = r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)";
192
193// Generated-code marker patterns. Matched against the leading window of the
194// file (see `is_generated`) so a marker phrase deep in the body does not
195// trigger a skip. Each alternative covers a widely-used convention:
196//
197// - `@generated`      — Facebook / Meta convention, also used by buck2,
198//                       rustfmt, prettier, and many code generators.
199// - `DO NOT EDIT`     — Go's `Code generated ... DO NOT EDIT.` line is
200//                       canonical, but the bare phrase appears in Bazel,
201//                       protoc, OpenAPI clients, etc. — match either.
202// - `GENERATED CODE`  — Lizard's marker; preserved for compatibility with
203//                       projects that already tag generated files this way.
204const GENERATED_EXPRESSION: &str = r"(?i)@generated\b|DO NOT EDIT|GENERATED CODE";
205
206/// Bytes from the start of the file scanned for a generated-code marker.
207/// 5 KiB is enough to cover any reasonable file header (license + autogen
208/// preamble) without paying a meaningful read cost.
209const GENERATED_SCAN_BYTES: usize = 5 * 1024;
210/// Maximum lines scanned for a generated-code marker. Caps the work on a
211/// pathological "all-on-one-line" file.
212const GENERATED_SCAN_LINES: usize = 50;
213
214/// Returns `true` when `buf` looks like generated code: its leading window
215/// (first ~50 lines or first 5 KiB, whichever is smaller) contains a known
216/// marker phrase. Matching is case-insensitive for the marker and never
217/// allocates on the negative path.
218///
219/// Recognized markers:
220///
221/// - `@generated` — Facebook / Meta convention, also used by buck2,
222///   rustfmt, and prettier.
223/// - `DO NOT EDIT` — Go's `Code generated by ... DO NOT EDIT.` is the
224///   canonical form; the bare phrase is also widely copied.
225/// - `GENERATED CODE` — Lizard's marker, preserved for compatibility.
226///
227/// Detection runs against raw bytes before parsing, so callers can discard
228/// generated files without paying tree-sitter parse cost. Non-UTF-8 input
229/// will not panic — `regex::bytes::Regex` operates on the raw byte slice.
230///
231/// # Examples
232///
233/// ```
234/// use big_code_analysis::is_generated;
235///
236/// assert!(is_generated(b"// @generated\nfn x() {}\n"));
237/// assert!(is_generated(
238///     b"// Code generated by protoc. DO NOT EDIT.\npackage x\n",
239/// ));
240/// assert!(!is_generated(b"fn main() { /* not generated */ }\n"));
241/// ```
242///
243/// # Panics
244///
245/// Panics if the embedded marker regex set fails to build; the marker
246/// list is a static literal so this represents a compile-time bug, not
247/// a runtime input that can be handled.
248pub fn is_generated(buf: &[u8]) -> bool {
249    // Strip a leading UTF-8 BOM so a marker on the first line of a
250    // BOM-prefixed file still matches against the line start. UTF-16 BOMs
251    // are not handled: the byte-pattern regex cannot match the
252    // interleaved-zero encoding (`@\x00g\x00...`) that follows a UTF-16
253    // BOM, so a strip would not enable detection — it would only obscure
254    // the fact that UTF-16 source files are unsupported here.
255    let buf = buf.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(buf);
256
257    // Bound the search window: at most GENERATED_SCAN_BYTES bytes, and
258    // among those, stop after GENERATED_SCAN_LINES newlines. Scanning fewer
259    // lines avoids matching a marker phrase deep in the file body (the
260    // negative case in the issue's acceptance criteria).
261    let cap = buf.len().min(GENERATED_SCAN_BYTES);
262    let end = buf[..cap]
263        .iter()
264        .enumerate()
265        .filter_map(|(i, &b)| (b == b'\n').then_some(i + 1))
266        .nth(GENERATED_SCAN_LINES - 1)
267        .unwrap_or(cap);
268    let window = &buf[..end];
269
270    RE_GENERATED
271        .get_or_init(|| {
272            Regex::new(GENERATED_EXPRESSION).expect("GENERATED_EXPRESSION is a constant regex")
273        })
274        .is_match(window)
275}
276
277#[inline]
278fn get_regex<'a>(
279    once_lock: &OnceLock<Regex>,
280    line: &'a [u8],
281    regex: &'a str,
282) -> Option<regex::bytes::Captures<'a>> {
283    once_lock
284        .get_or_init(|| Regex::new(regex).expect("constant regex pattern must compile"))
285        .captures_iter(line)
286        .next()
287}
288
289/// Resolves a language from a script's shebang line.
290///
291/// Returns `None` unless `buf` starts with `#!`. Reads up to the first `\n`,
292/// strips an optional trailing `\r`, splits on whitespace, and takes the
293/// basename of either the first token or — when that basename is `env` — the
294/// next non-flag token. Trailing version digits and dots (`python3`,
295/// `lua5.1`, `perl5.36`) are stripped before lookup. Non-UTF-8 bytes on the
296/// shebang line yield `None` (no panic).
297fn get_shebang_lang(buf: &[u8]) -> Option<LANG> {
298    // Early-out for the common case (any non-shebang buffer): no allocation,
299    // no UTF-8 decoding.
300    let rest = buf.strip_prefix(b"#!")?;
301    let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
302    let line = &rest[..line_end];
303    // Trim a trailing CR even though normalize_line_endings should have removed
304    // it — guess_language is on the public API and may be called with raw input.
305    let line = line.strip_suffix(b"\r").unwrap_or(line);
306    let line = std::str::from_utf8(line).ok()?;
307
308    let mut tokens = line.split_ascii_whitespace();
309    let first_base = basename(tokens.next()?);
310
311    let interpreter = if first_base == "env" {
312        skip_env_args(&mut tokens)?
313    } else {
314        first_base
315    };
316
317    get_from_interpreter(strip_version_suffix(interpreter))
318}
319
320// Walk past leading `env` arguments (`-FLAG`, `-u VAR`, `NAME=value`) and
321// return the basename of the actual interpreter token. Per `env(1)`, only
322// `-u` consumes a following argument; other short flags (`-i`, `-S`, …)
323// stand alone or carry their argument inline (e.g. `-S "node --foo"`).
324fn skip_env_args<'a>(tokens: &mut std::str::SplitAsciiWhitespace<'a>) -> Option<&'a str> {
325    loop {
326        let tok = tokens.next()?;
327        if let Some(flag) = tok.strip_prefix('-') {
328            if flag == "u" {
329                tokens.next()?;
330            }
331            continue;
332        }
333        if tok.contains('=') {
334            continue;
335        }
336        return Some(basename(tok));
337    }
338}
339
340fn basename(path: &str) -> &str {
341    path.rsplit_once('/').map_or(path, |(_, name)| name)
342}
343
344/// Strips a trailing run of digits and dots used to encode an interpreter
345/// version (`python3` → `python`, `lua5.1` → `lua`, `perl5.36` → `perl`).
346fn strip_version_suffix(name: &str) -> &str {
347    let trimmed = name.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
348    if trimmed.is_empty() { name } else { trimmed }
349}
350
351fn get_from_interpreter(name: &str) -> Option<LANG> {
352    match name {
353        "sh" | "bash" | "dash" | "ksh" | "zsh" => Some(LANG::Bash),
354        "python" => Some(LANG::Python),
355        "perl" => Some(LANG::Perl),
356        "lua" | "luajit" => Some(LANG::Lua),
357        "php" | "php-cgi" => Some(LANG::Php),
358        "node" | "nodejs" => Some(LANG::Javascript),
359        "tclsh" | "wish" => Some(LANG::Tcl),
360        "ruby" => Some(LANG::Ruby),
361        "elixir" | "iex" => Some(LANG::Elixir),
362        _ => None,
363    }
364}
365
366fn get_emacs_mode(buf: &[u8]) -> Option<String> {
367    // we just try to use the emacs info (if there)
368    for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
369        if let Some(cap) = get_regex(&RE1_EMACS, line, FIRST_EMACS_EXPRESSION) {
370            return mode_to_str(&cap[1]);
371        } else if let Some(cap) = get_regex(&RE2_EMACS, line, SECOND_EMACS_EXPRESSION) {
372            return mode_to_str(&cap[1]);
373        } else if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
374            return mode_to_str(&cap[1]);
375        }
376        if i == 3 {
377            break;
378        }
379    }
380
381    for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
382        if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
383            return mode_to_str(&cap[1]);
384        }
385        if i == 3 {
386            break;
387        }
388    }
389
390    None
391}
392
393/// Guesses the language of a code.
394///
395/// Returns a tuple containing a [`LANG`] as first argument
396/// and the language name as a second one.
397///
398/// # Examples
399///
400/// ```
401/// use std::path::PathBuf;
402///
403/// use big_code_analysis::guess_language;
404///
405/// let source_code = "int a = 42;";
406///
407/// // The path to a dummy file used to contain the source code
408/// let path = PathBuf::from("foo.c");
409/// let source_slice = source_code.as_bytes();
410///
411/// // Guess the language of a code
412/// guess_language(&source_slice, &path);
413/// ```
414///
415/// [`LANG`]: enum.LANG.html
416pub fn guess_language<'a, P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, &'a str) {
417    let ext = path
418        .as_ref()
419        .extension()
420        .and_then(|e| e.to_str())
421        .map(str::to_lowercase)
422        .unwrap_or_default();
423    let from_ext = get_from_ext(&ext);
424
425    let mode = get_emacs_mode(buf).unwrap_or_default();
426
427    let from_mode = get_from_emacs_mode(&mode);
428
429    if let Some(lang_ext) = from_ext {
430        if let Some(lang_mode) = from_mode {
431            if lang_ext == lang_mode {
432                (
433                    Some(lang_mode),
434                    fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
435                )
436            } else {
437                // we should probably rely on extension here
438                (Some(lang_ext), lang_ext.get_name())
439            }
440        } else {
441            (
442                Some(lang_ext),
443                fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name()),
444            )
445        }
446    } else if let Some(lang_mode) = from_mode {
447        (
448            Some(lang_mode),
449            fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
450        )
451    } else if let Some(lang_shebang) = get_shebang_lang(buf) {
452        (
453            Some(lang_shebang),
454            fake::get_true(&ext, &mode).unwrap_or_else(|| lang_shebang.get_name()),
455        )
456    } else {
457        (None, fake::get_true(&ext, &mode).unwrap_or_default())
458    }
459}
460
461/// Normalises all CR-only and CRLF line endings to LF throughout the buffer,
462/// then ensures the buffer ends with exactly one `\n`.
463pub(crate) fn normalize_line_endings(data: &mut Vec<u8>) {
464    // In-place compaction: write pointer stays ≤ read pointer, so no extra allocation.
465    let mut w = 0;
466    let mut r = 0;
467    while r < data.len() {
468        if data[r] == b'\r' {
469            data[w] = b'\n';
470            w += 1;
471            r += if data.get(r + 1).copied() == Some(b'\n') {
472                2
473            } else {
474                1
475            };
476        } else {
477            data[w] = data[r];
478            w += 1;
479            r += 1;
480        }
481    }
482    data.truncate(w);
483    let trailing = data.iter().rev().take_while(|&&c| c == b'\n').count();
484    data.truncate(data.len() - trailing);
485    data.push(b'\n');
486}
487
488pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
489    // Copied from Cargo sources: https://github.com/rust-lang/cargo/blob/master/src/cargo/util/paths.rs#L65
490    let mut components = path.as_ref().components().peekable();
491    let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() {
492        components.next();
493        PathBuf::from(c.as_os_str())
494    } else {
495        PathBuf::new()
496    };
497
498    for component in components {
499        match component {
500            Component::Prefix(..) => unreachable!(),
501            Component::RootDir => {
502                ret.push(component.as_os_str());
503            }
504            Component::CurDir => {}
505            Component::ParentDir => {
506                ret.pop();
507            }
508            Component::Normal(c) => {
509                ret.push(c);
510            }
511        }
512    }
513    ret
514}
515
516pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
517    for ancestor in path1.ancestors() {
518        if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
519            // `ancestor` is yielded by `path1.ancestors()`, so it is
520            // a prefix of `path1` by construction; `path2` was just
521            // verified by `starts_with` above. Both `strip_prefix`
522            // calls are therefore infallible.
523            let path1 = path1
524                .strip_prefix(ancestor)
525                .expect("ancestor is by construction a prefix of path1");
526            let path2 = path2
527                .strip_prefix(ancestor)
528                .expect("ancestor verified by starts_with above");
529            return Some(path1.components().count() + path2.components().count());
530        }
531    }
532    None
533}
534
535pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
536    current_path: &Path,
537    include_path: &str,
538    all_files: &HashMap<String, Vec<PathBuf>, S>,
539) -> Vec<PathBuf> {
540    let include_path = include_path
541        .strip_prefix("mozilla/")
542        .unwrap_or(include_path);
543
544    // Resolve the include relative to the including file's parent
545    // before normalizing. This preserves leading `..` traversal so
546    // `#include "../foo.h"` from `src/lib/file.c` targets
547    // `src/foo.h`, not the lexically-popped `foo.h` (issue #297).
548    // Lexical-only normalization is required because `current_path`
549    // and the entries in `all_files` are typically not canonicalized
550    // and the included header need not exist on disk yet.
551    let resolved_path = current_path
552        .parent()
553        .map(|parent| normalize_path(parent.join(include_path)));
554
555    let include_path = normalize_path(include_path);
556    let Some(file_name) = include_path.file_name().and_then(|n| n.to_str()) else {
557        return vec![];
558    };
559    let Some(possibilities) = all_files.get(file_name) else {
560        return vec![];
561    };
562    if possibilities.len() == 1 {
563        return possibilities.clone();
564    }
565
566    // Strategy chain: each step looks for a UNIQUE candidate that
567    // matches a progressively weaker signal (full resolved target →
568    // suffix on the normalized include → siblings of the including
569    // file). When no step yields a unique match, fall back to the
570    // closest by path distance, which may return zero or many.
571    resolve_against_resolved(possibilities, current_path, resolved_path.as_deref())
572        .or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(&include_path)))
573        .or_else(|| resolve_against_parent(possibilities, current_path))
574        .unwrap_or_else(|| min_distance_candidates(possibilities, current_path))
575}
576
577/// Filter `possibilities` to those satisfying `pred` and distinct
578/// from `current_path`, returning `Some(matched)` only when exactly
579/// one survives. The cascading caller treats `None` as "this strategy
580/// did not yield a unique resolution — try the next one."
581fn unique_filter<F>(possibilities: &[PathBuf], current_path: &Path, pred: F) -> Option<Vec<PathBuf>>
582where
583    F: Fn(&PathBuf) -> bool,
584{
585    let matched: Vec<PathBuf> = possibilities
586        .iter()
587        .filter(|p| current_path != p.as_path() && pred(p))
588        .cloned()
589        .collect();
590    (matched.len() == 1).then_some(matched)
591}
592
593/// Strongest signal: a candidate matches the fully resolved relative
594/// target. Prefer exact equality, then suffix match (so absolute
595/// `all_files` entries still match a relative resolved target like
596/// `src/foo.h`).
597fn resolve_against_resolved(
598    possibilities: &[PathBuf],
599    current_path: &Path,
600    resolved: Option<&Path>,
601) -> Option<Vec<PathBuf>> {
602    let resolved = resolved?;
603    unique_filter(possibilities, current_path, |p| p == resolved)
604        .or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(resolved)))
605}
606
607/// Candidate-in-same-directory heuristic: keep entries whose path
608/// starts with the including file's parent directory.
609fn resolve_against_parent(possibilities: &[PathBuf], current_path: &Path) -> Option<Vec<PathBuf>> {
610    let parent = current_path.parent()?;
611    unique_filter(possibilities, current_path, |p| p.starts_with(parent))
612}
613
614/// Last-chance fallback in the `guess_file` strategy chain: returns
615/// every candidate whose `get_paths_dist` from `current_path` ties
616/// the minimum, or an empty `Vec` when no candidate has a defined
617/// distance. Unlike the unique-match strategies, this may
618/// legitimately return zero or many entries — its result is the
619/// function's final answer, not a "try the next strategy" signal.
620fn min_distance_candidates(possibilities: &[PathBuf], current_path: &Path) -> Vec<PathBuf> {
621    // Hold survivors as borrows during the walk: `Less` arms clear the
622    // prior set without dropping owned `PathBuf`s, and the trailing
623    // `cloned()` runs exactly once per final survivor — never on
624    // entries that were tentatively kept and later evicted.
625    let mut dist_min = usize::MAX;
626    let mut path_min: Vec<&PathBuf> = Vec::new();
627    for p in possibilities {
628        if current_path == p {
629            continue;
630        }
631        let Some(dist) = get_paths_dist(current_path, p) else {
632            continue;
633        };
634        match dist.cmp(&dist_min) {
635            Ordering::Less => {
636                dist_min = dist;
637                path_min.clear();
638                path_min.push(p);
639            }
640            Ordering::Equal => path_min.push(p),
641            Ordering::Greater => {}
642        }
643    }
644    path_min.into_iter().cloned().collect()
645}
646
647#[inline]
648pub(crate) fn color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
649    stdout.set_color(ColorSpec::new().set_fg(Some(color)))
650}
651
652#[inline]
653pub(crate) fn intense_color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
654    stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_intense(true))
655}
656
657#[cfg(test)]
658pub(crate) fn check_func_space<T: crate::ParserTrait, F: Fn(crate::FuncSpace)>(
659    source: &str,
660    filename: &str,
661    check: F,
662) {
663    let path = std::path::PathBuf::from(filename);
664    // Mirror the CRLF/CR normalisation that read_file_with_eol applies via normalize_line_endings
665    let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
666    let mut trimmed_bytes = normalized.trim_end().trim_matches('\n').as_bytes().to_vec();
667    trimmed_bytes.push(b'\n');
668    let parser = T::new(trimmed_bytes, &path, None);
669    #[allow(deprecated)]
670    let func_space = crate::metrics(&parser, &path).unwrap();
671
672    check(func_space);
673}
674
675#[cfg(test)]
676pub(crate) fn check_metrics<T: crate::ParserTrait>(
677    source: &str,
678    filename: &str,
679    check: fn(crate::CodeMetrics) -> (),
680) {
681    check_func_space::<T, _>(source, filename, |func_space| check(func_space.metrics));
682}
683
684/// Asserts that `func_space` has a direct child space named `name` and that
685/// its `kind` matches `expected`.
686///
687/// Used by annotation-type / class / interface tests that need to verify
688/// the structural FuncSpace tree (not just metric values), since vacuous
689/// metric assertions can pass even when `is_func_space` has been reverted
690/// for the node kind under test.
691#[cfg(test)]
692pub(crate) fn assert_child_space_kind(
693    func_space: &crate::FuncSpace,
694    name: &str,
695    expected: crate::SpaceKind,
696) {
697    let child = func_space
698        .spaces
699        .iter()
700        .find(|s| s.name.as_deref() == Some(name))
701        .unwrap_or_else(|| panic!("expected a child FuncSpace named {name:?}"));
702    assert_eq!(
703        child.kind, expected,
704        "child FuncSpace {name:?} kind: got {:?}, expected {:?}",
705        child.kind, expected,
706    );
707}
708
709#[cfg(test)]
710#[path = "tools_tests.rs"]
711mod tests;