big_code_analysis/tools.rs
1// Per-language metric and AST modules deliberately consume the macro-
2// generated tree-sitter token enums via `use crate::*` and `use Foo::*`
3// inside match expressions — explicit imports would list dozens of
4// variants per arm and obscure the per-language token sets that are the
5// point of these files. Allowed at the module level rather than per
6// function so the per-language impl blocks stay readable.
7#![allow(clippy::wildcard_imports, clippy::enum_glob_use)]
8// Metric counts (token, function, branch, argument, etc.) are stored as
9// `usize` and crossed with `f64` averages, ratios, and Halstead scores
10// across the cyclomatic / MI / Halstead computations. The `usize as f64`
11// and `f64 as usize` casts are intentional and snapshot-anchored — every
12// site is bounded by the count it came from. Allowing the lints at the
13// module level keeps the metric arithmetic legible.
14#![allow(
15 clippy::cast_precision_loss,
16 clippy::cast_possible_truncation,
17 clippy::cast_sign_loss
18)]
19
20use std::cmp::Ordering;
21use std::collections::HashMap;
22use std::fs::{self, File};
23use std::io::{Read, Write};
24use std::path::{Component, Path, PathBuf};
25use std::sync::OnceLock;
26
27use regex::bytes::Regex;
28use termcolor::{Color, ColorSpec, StandardStreamLock, WriteColor};
29
30use crate::langs::fake;
31use crate::langs::*;
32
33/// Reads a file, normalising all CR-only and CRLF line endings to LF.
34///
35/// **Note for downstream consumers**: the returned buffer never contains `\r`
36/// bytes. Callers that previously observed raw `\r\n` sequences will see plain
37/// `\n` after this call. This is intentional — the metric engine requires LF-
38/// only input — but it is a behavioural difference from a plain `fs::read`.
39///
40/// # Errors
41///
42/// Returns any [`std::io::Error`] surfaced by [`File::open`] (the
43/// path is missing, lacks read permission, is a directory, …) or by
44/// [`File::read_to_end`] while reading the file contents.
45///
46/// # Examples
47///
48/// ```
49/// use std::path::Path;
50///
51/// use big_code_analysis::read_file;
52///
53/// let path = Path::new("Cargo.toml");
54/// read_file(&path).unwrap();
55/// ```
56pub fn read_file(path: &Path) -> std::io::Result<Vec<u8>> {
57 let mut file = File::open(path)?;
58 let mut data = Vec::new();
59 file.read_to_end(&mut data)?;
60
61 normalize_line_endings(&mut data);
62
63 Ok(data)
64}
65
66/// Reads a file, normalising all CR-only and CRLF line endings to LF, and ensures
67/// the buffer ends with exactly one `\n`. Returns `None` for files ≤ 3 bytes or
68/// files that appear to be non-UTF-8.
69///
70/// # Errors
71///
72/// Returns any [`std::io::Error`] surfaced by [`File::open`] (the
73/// path is missing, lacks read permission, is a directory, …) or by
74/// the subsequent reads from the open file handle. A non-UTF-8 head
75/// or a too-small file is reported via `Ok(None)`, not an error.
76///
77/// # Examples
78///
79/// ```
80/// use std::path::Path;
81///
82/// use big_code_analysis::read_file_with_eol;
83///
84/// let path = Path::new("Cargo.toml");
85/// read_file_with_eol(&path).unwrap();
86/// ```
87pub fn read_file_with_eol(path: &Path) -> std::io::Result<Option<Vec<u8>>> {
88 let file_size = fs::metadata(path).map_or(1024 * 1024, |m| m.len() as usize);
89 if file_size <= 3 {
90 // this file is very likely almost empty... so nothing to do on it
91 return Ok(None);
92 }
93
94 let mut file = File::open(path)?;
95
96 let mut start = vec![0; 64.min(file_size)];
97 let start = if file.read_exact(&mut start).is_ok() {
98 // Skip the bom if one
99 if start[..2] == [b'\xFE', b'\xFF'] || start[..2] == [b'\xFF', b'\xFE'] {
100 &start[2..]
101 } else if start[..3] == [b'\xEF', b'\xBB', b'\xBF'] {
102 &start[3..]
103 } else {
104 &start
105 }
106 } else {
107 return Ok(None);
108 };
109
110 // so start contains more or less 64 chars
111 let mut head = String::from_utf8_lossy(start).into_owned();
112 // The last char could be wrong because we were in the middle of an utf-8 sequence
113 head.pop();
114 // now check if there is an invalid char
115 if head.contains('\u{FFFD}') {
116 return Ok(None);
117 }
118
119 let mut data = Vec::with_capacity(file_size + 2);
120 data.extend_from_slice(start);
121
122 file.read_to_end(&mut data)?;
123
124 normalize_line_endings(&mut data);
125
126 Ok(Some(data))
127}
128
129/// Writes data to a file.
130///
131/// # Errors
132///
133/// Returns any [`std::io::Error`] surfaced by [`File::create`]
134/// (parent directory missing, lacks write permission, target is a
135/// directory, …) or by [`File::write_all`] while writing the buffer.
136///
137/// # Examples
138///
139/// ```no_run
140/// use std::path::Path;
141///
142/// use big_code_analysis::write_file;
143///
144/// let path = Path::new("foo.txt");
145/// let data: [u8; 4] = [0; 4];
146/// write_file(&path, &data).unwrap();
147/// ```
148pub fn write_file(path: &Path, data: &[u8]) -> std::io::Result<()> {
149 let mut file = File::create(path)?;
150 file.write_all(data)?;
151
152 Ok(())
153}
154
155/// Detects the language of a code using
156/// the extension of a file.
157///
158/// # Examples
159///
160/// ```
161/// use std::path::Path;
162///
163/// use big_code_analysis::get_language_for_file;
164///
165/// let path = Path::new("build.rs");
166/// get_language_for_file(&path).unwrap();
167/// ```
168#[must_use]
169pub fn get_language_for_file(path: &Path) -> Option<LANG> {
170 if let Some(ext) = path.extension() {
171 let ext = ext.to_str()?.to_lowercase();
172 get_from_ext(&ext)
173 } else {
174 None
175 }
176}
177
178fn mode_to_str(mode: &[u8]) -> Option<String> {
179 std::str::from_utf8(mode).ok().map(str::to_lowercase)
180}
181
182// comment containing coding info are useful
183static RE1_EMACS: OnceLock<Regex> = OnceLock::new();
184static RE2_EMACS: OnceLock<Regex> = OnceLock::new();
185static RE1_VIM: OnceLock<Regex> = OnceLock::new();
186static RE_GENERATED: OnceLock<Regex> = OnceLock::new();
187
188// Regular expressions
189const FIRST_EMACS_EXPRESSION: &str = r"(?i)-\*-.*[^-\w]mode\s*:\s*([^:;\s]+)";
190const SECOND_EMACS_EXPRESSION: &str = r"-\*-\s*([^:;\s]+)\s*-\*-";
191const VIM_EXPRESSION: &str = r"(?i)vim\s*:.*[^\w]ft\s*=\s*([^:\s]+)";
192
193// Generated-code marker patterns. Matched against the leading window of the
194// file (see `is_generated`) so a marker phrase deep in the body does not
195// trigger a skip. Each alternative covers a widely-used convention:
196//
197// - `@generated` — Facebook / Meta convention, also used by buck2,
198// rustfmt, prettier, and many code generators.
199// - `DO NOT EDIT` — Go's `Code generated ... DO NOT EDIT.` line is
200// canonical, but the bare phrase appears in Bazel,
201// protoc, OpenAPI clients, etc. — match either.
202// - `GENERATED CODE` — Lizard's marker; preserved for compatibility with
203// projects that already tag generated files this way.
204const GENERATED_EXPRESSION: &str = r"(?i)@generated\b|DO NOT EDIT|GENERATED CODE";
205
206/// Bytes from the start of the file scanned for a generated-code marker.
207/// 5 KiB is enough to cover any reasonable file header (license + autogen
208/// preamble) without paying a meaningful read cost.
209const GENERATED_SCAN_BYTES: usize = 5 * 1024;
210/// Maximum lines scanned for a generated-code marker. Caps the work on a
211/// pathological "all-on-one-line" file.
212const GENERATED_SCAN_LINES: usize = 50;
213
214/// Returns `true` when `buf` looks like generated code: its leading window
215/// (first ~50 lines or first 5 KiB, whichever is smaller) contains a known
216/// marker phrase. Matching is case-insensitive for the marker and never
217/// allocates on the negative path.
218///
219/// Recognized markers:
220///
221/// - `@generated` — Facebook / Meta convention, also used by buck2,
222/// rustfmt, and prettier.
223/// - `DO NOT EDIT` — Go's `Code generated by ... DO NOT EDIT.` is the
224/// canonical form; the bare phrase is also widely copied.
225/// - `GENERATED CODE` — Lizard's marker, preserved for compatibility.
226///
227/// Detection runs against raw bytes before parsing, so callers can discard
228/// generated files without paying tree-sitter parse cost. Non-UTF-8 input
229/// will not panic — `regex::bytes::Regex` operates on the raw byte slice.
230///
231/// # Examples
232///
233/// ```
234/// use big_code_analysis::is_generated;
235///
236/// assert!(is_generated(b"// @generated\nfn x() {}\n"));
237/// assert!(is_generated(
238/// b"// Code generated by protoc. DO NOT EDIT.\npackage x\n",
239/// ));
240/// assert!(!is_generated(b"fn main() { /* not generated */ }\n"));
241/// ```
242///
243/// # Panics
244///
245/// Panics if the embedded marker regex set fails to build; the marker
246/// list is a static literal so this represents a compile-time bug, not
247/// a runtime input that can be handled.
248pub fn is_generated(buf: &[u8]) -> bool {
249 // Strip a leading UTF-8 BOM so a marker on the first line of a
250 // BOM-prefixed file still matches against the line start. UTF-16 BOMs
251 // are not handled: the byte-pattern regex cannot match the
252 // interleaved-zero encoding (`@\x00g\x00...`) that follows a UTF-16
253 // BOM, so a strip would not enable detection — it would only obscure
254 // the fact that UTF-16 source files are unsupported here.
255 let buf = buf.strip_prefix(b"\xEF\xBB\xBF").unwrap_or(buf);
256
257 // Bound the search window: at most GENERATED_SCAN_BYTES bytes, and
258 // among those, stop after GENERATED_SCAN_LINES newlines. Scanning fewer
259 // lines avoids matching a marker phrase deep in the file body (the
260 // negative case in the issue's acceptance criteria).
261 let cap = buf.len().min(GENERATED_SCAN_BYTES);
262 let end = buf[..cap]
263 .iter()
264 .enumerate()
265 .filter_map(|(i, &b)| (b == b'\n').then_some(i + 1))
266 .nth(GENERATED_SCAN_LINES - 1)
267 .unwrap_or(cap);
268 let window = &buf[..end];
269
270 RE_GENERATED
271 .get_or_init(|| {
272 Regex::new(GENERATED_EXPRESSION).expect("GENERATED_EXPRESSION is a constant regex")
273 })
274 .is_match(window)
275}
276
277#[inline]
278fn get_regex<'a>(
279 once_lock: &OnceLock<Regex>,
280 line: &'a [u8],
281 regex: &'a str,
282) -> Option<regex::bytes::Captures<'a>> {
283 once_lock
284 .get_or_init(|| Regex::new(regex).expect("constant regex pattern must compile"))
285 .captures_iter(line)
286 .next()
287}
288
289/// Resolves a language from a script's shebang line.
290///
291/// Returns `None` unless `buf` starts with `#!`. Reads up to the first `\n`,
292/// strips an optional trailing `\r`, splits on whitespace, and takes the
293/// basename of either the first token or — when that basename is `env` — the
294/// next non-flag token. Trailing version digits and dots (`python3`,
295/// `lua5.1`, `perl5.36`) are stripped before lookup. Non-UTF-8 bytes on the
296/// shebang line yield `None` (no panic).
297fn get_shebang_lang(buf: &[u8]) -> Option<LANG> {
298 // Early-out for the common case (any non-shebang buffer): no allocation,
299 // no UTF-8 decoding.
300 let rest = buf.strip_prefix(b"#!")?;
301 let line_end = rest.iter().position(|&b| b == b'\n').unwrap_or(rest.len());
302 let line = &rest[..line_end];
303 // Trim a trailing CR even though normalize_line_endings should have removed
304 // it — guess_language is on the public API and may be called with raw input.
305 let line = line.strip_suffix(b"\r").unwrap_or(line);
306 let line = std::str::from_utf8(line).ok()?;
307
308 let mut tokens = line.split_ascii_whitespace();
309 let first_base = basename(tokens.next()?);
310
311 let interpreter = if first_base == "env" {
312 skip_env_args(&mut tokens)?
313 } else {
314 first_base
315 };
316
317 get_from_interpreter(strip_version_suffix(interpreter))
318}
319
320// Walk past leading `env` arguments (`-FLAG`, `-u VAR`, `NAME=value`) and
321// return the basename of the actual interpreter token. Per `env(1)`, only
322// `-u` consumes a following argument; other short flags (`-i`, `-S`, …)
323// stand alone or carry their argument inline (e.g. `-S "node --foo"`).
324fn skip_env_args<'a>(tokens: &mut std::str::SplitAsciiWhitespace<'a>) -> Option<&'a str> {
325 loop {
326 let tok = tokens.next()?;
327 if let Some(flag) = tok.strip_prefix('-') {
328 if flag == "u" {
329 tokens.next()?;
330 }
331 continue;
332 }
333 if tok.contains('=') {
334 continue;
335 }
336 return Some(basename(tok));
337 }
338}
339
340fn basename(path: &str) -> &str {
341 path.rsplit_once('/').map_or(path, |(_, name)| name)
342}
343
344/// Strips a trailing run of digits and dots used to encode an interpreter
345/// version (`python3` → `python`, `lua5.1` → `lua`, `perl5.36` → `perl`).
346fn strip_version_suffix(name: &str) -> &str {
347 let trimmed = name.trim_end_matches(|c: char| c.is_ascii_digit() || c == '.');
348 if trimmed.is_empty() { name } else { trimmed }
349}
350
351fn get_from_interpreter(name: &str) -> Option<LANG> {
352 match name {
353 "sh" | "bash" | "dash" | "ksh" | "zsh" => Some(LANG::Bash),
354 "python" => Some(LANG::Python),
355 "perl" => Some(LANG::Perl),
356 "lua" | "luajit" => Some(LANG::Lua),
357 "php" | "php-cgi" => Some(LANG::Php),
358 "node" | "nodejs" => Some(LANG::Javascript),
359 "tclsh" | "wish" => Some(LANG::Tcl),
360 "ruby" => Some(LANG::Ruby),
361 "elixir" | "iex" => Some(LANG::Elixir),
362 _ => None,
363 }
364}
365
366fn get_emacs_mode(buf: &[u8]) -> Option<String> {
367 // we just try to use the emacs info (if there)
368 for (i, line) in buf.splitn(5, |c| *c == b'\n').enumerate() {
369 if let Some(cap) = get_regex(&RE1_EMACS, line, FIRST_EMACS_EXPRESSION) {
370 return mode_to_str(&cap[1]);
371 } else if let Some(cap) = get_regex(&RE2_EMACS, line, SECOND_EMACS_EXPRESSION) {
372 return mode_to_str(&cap[1]);
373 } else if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
374 return mode_to_str(&cap[1]);
375 }
376 if i == 3 {
377 break;
378 }
379 }
380
381 for (i, line) in buf.rsplitn(5, |c| *c == b'\n').enumerate() {
382 if let Some(cap) = get_regex(&RE1_VIM, line, VIM_EXPRESSION) {
383 return mode_to_str(&cap[1]);
384 }
385 if i == 3 {
386 break;
387 }
388 }
389
390 None
391}
392
393/// Guesses the language of a code.
394///
395/// Returns a tuple containing a [`LANG`] as first argument
396/// and the language name as a second one.
397///
398/// # Examples
399///
400/// ```
401/// use std::path::PathBuf;
402///
403/// use big_code_analysis::guess_language;
404///
405/// let source_code = "int a = 42;";
406///
407/// // The path to a dummy file used to contain the source code
408/// let path = PathBuf::from("foo.c");
409/// let source_slice = source_code.as_bytes();
410///
411/// // Guess the language of a code
412/// guess_language(&source_slice, &path);
413/// ```
414///
415/// [`LANG`]: enum.LANG.html
416pub fn guess_language<'a, P: AsRef<Path>>(buf: &[u8], path: P) -> (Option<LANG>, &'a str) {
417 let ext = path
418 .as_ref()
419 .extension()
420 .and_then(|e| e.to_str())
421 .map(str::to_lowercase)
422 .unwrap_or_default();
423 let from_ext = get_from_ext(&ext);
424
425 let mode = get_emacs_mode(buf).unwrap_or_default();
426
427 let from_mode = get_from_emacs_mode(&mode);
428
429 if let Some(lang_ext) = from_ext {
430 if let Some(lang_mode) = from_mode {
431 if lang_ext == lang_mode {
432 (
433 Some(lang_mode),
434 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
435 )
436 } else {
437 // we should probably rely on extension here
438 (Some(lang_ext), lang_ext.get_name())
439 }
440 } else {
441 (
442 Some(lang_ext),
443 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_ext.get_name()),
444 )
445 }
446 } else if let Some(lang_mode) = from_mode {
447 (
448 Some(lang_mode),
449 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_mode.get_name()),
450 )
451 } else if let Some(lang_shebang) = get_shebang_lang(buf) {
452 (
453 Some(lang_shebang),
454 fake::get_true(&ext, &mode).unwrap_or_else(|| lang_shebang.get_name()),
455 )
456 } else {
457 (None, fake::get_true(&ext, &mode).unwrap_or_default())
458 }
459}
460
461/// Normalises all CR-only and CRLF line endings to LF throughout the buffer,
462/// then ensures the buffer ends with exactly one `\n`.
463pub(crate) fn normalize_line_endings(data: &mut Vec<u8>) {
464 // In-place compaction: write pointer stays ≤ read pointer, so no extra allocation.
465 let mut w = 0;
466 let mut r = 0;
467 while r < data.len() {
468 if data[r] == b'\r' {
469 data[w] = b'\n';
470 w += 1;
471 r += if data.get(r + 1).copied() == Some(b'\n') {
472 2
473 } else {
474 1
475 };
476 } else {
477 data[w] = data[r];
478 w += 1;
479 r += 1;
480 }
481 }
482 data.truncate(w);
483 let trailing = data.iter().rev().take_while(|&&c| c == b'\n').count();
484 data.truncate(data.len() - trailing);
485 data.push(b'\n');
486}
487
488pub(crate) fn normalize_path<P: AsRef<Path>>(path: P) -> PathBuf {
489 // Copied from Cargo sources: https://github.com/rust-lang/cargo/blob/master/src/cargo/util/paths.rs#L65
490 let mut components = path.as_ref().components().peekable();
491 let mut ret = if let Some(c @ Component::Prefix(..)) = components.peek().copied() {
492 components.next();
493 PathBuf::from(c.as_os_str())
494 } else {
495 PathBuf::new()
496 };
497
498 for component in components {
499 match component {
500 Component::Prefix(..) => unreachable!(),
501 Component::RootDir => {
502 ret.push(component.as_os_str());
503 }
504 Component::CurDir => {}
505 Component::ParentDir => {
506 ret.pop();
507 }
508 Component::Normal(c) => {
509 ret.push(c);
510 }
511 }
512 }
513 ret
514}
515
516pub(crate) fn get_paths_dist(path1: &Path, path2: &Path) -> Option<usize> {
517 for ancestor in path1.ancestors() {
518 if path2.starts_with(ancestor) && !ancestor.as_os_str().is_empty() {
519 // `ancestor` is yielded by `path1.ancestors()`, so it is
520 // a prefix of `path1` by construction; `path2` was just
521 // verified by `starts_with` above. Both `strip_prefix`
522 // calls are therefore infallible.
523 let path1 = path1
524 .strip_prefix(ancestor)
525 .expect("ancestor is by construction a prefix of path1");
526 let path2 = path2
527 .strip_prefix(ancestor)
528 .expect("ancestor verified by starts_with above");
529 return Some(path1.components().count() + path2.components().count());
530 }
531 }
532 None
533}
534
535pub(crate) fn guess_file<S: ::std::hash::BuildHasher>(
536 current_path: &Path,
537 include_path: &str,
538 all_files: &HashMap<String, Vec<PathBuf>, S>,
539) -> Vec<PathBuf> {
540 let include_path = include_path
541 .strip_prefix("mozilla/")
542 .unwrap_or(include_path);
543
544 // Resolve the include relative to the including file's parent
545 // before normalizing. This preserves leading `..` traversal so
546 // `#include "../foo.h"` from `src/lib/file.c` targets
547 // `src/foo.h`, not the lexically-popped `foo.h` (issue #297).
548 // Lexical-only normalization is required because `current_path`
549 // and the entries in `all_files` are typically not canonicalized
550 // and the included header need not exist on disk yet.
551 let resolved_path = current_path
552 .parent()
553 .map(|parent| normalize_path(parent.join(include_path)));
554
555 let include_path = normalize_path(include_path);
556 let Some(file_name) = include_path.file_name().and_then(|n| n.to_str()) else {
557 return vec![];
558 };
559 let Some(possibilities) = all_files.get(file_name) else {
560 return vec![];
561 };
562 if possibilities.len() == 1 {
563 return possibilities.clone();
564 }
565
566 // Strategy chain: each step looks for a UNIQUE candidate that
567 // matches a progressively weaker signal (full resolved target →
568 // suffix on the normalized include → siblings of the including
569 // file). When no step yields a unique match, fall back to the
570 // closest by path distance, which may return zero or many.
571 resolve_against_resolved(possibilities, current_path, resolved_path.as_deref())
572 .or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(&include_path)))
573 .or_else(|| resolve_against_parent(possibilities, current_path))
574 .unwrap_or_else(|| min_distance_candidates(possibilities, current_path))
575}
576
577/// Filter `possibilities` to those satisfying `pred` and distinct
578/// from `current_path`, returning `Some(matched)` only when exactly
579/// one survives. The cascading caller treats `None` as "this strategy
580/// did not yield a unique resolution — try the next one."
581fn unique_filter<F>(possibilities: &[PathBuf], current_path: &Path, pred: F) -> Option<Vec<PathBuf>>
582where
583 F: Fn(&PathBuf) -> bool,
584{
585 let matched: Vec<PathBuf> = possibilities
586 .iter()
587 .filter(|p| current_path != p.as_path() && pred(p))
588 .cloned()
589 .collect();
590 (matched.len() == 1).then_some(matched)
591}
592
593/// Strongest signal: a candidate matches the fully resolved relative
594/// target. Prefer exact equality, then suffix match (so absolute
595/// `all_files` entries still match a relative resolved target like
596/// `src/foo.h`).
597fn resolve_against_resolved(
598 possibilities: &[PathBuf],
599 current_path: &Path,
600 resolved: Option<&Path>,
601) -> Option<Vec<PathBuf>> {
602 let resolved = resolved?;
603 unique_filter(possibilities, current_path, |p| p == resolved)
604 .or_else(|| unique_filter(possibilities, current_path, |p| p.ends_with(resolved)))
605}
606
607/// Candidate-in-same-directory heuristic: keep entries whose path
608/// starts with the including file's parent directory.
609fn resolve_against_parent(possibilities: &[PathBuf], current_path: &Path) -> Option<Vec<PathBuf>> {
610 let parent = current_path.parent()?;
611 unique_filter(possibilities, current_path, |p| p.starts_with(parent))
612}
613
614/// Last-chance fallback in the `guess_file` strategy chain: returns
615/// every candidate whose `get_paths_dist` from `current_path` ties
616/// the minimum, or an empty `Vec` when no candidate has a defined
617/// distance. Unlike the unique-match strategies, this may
618/// legitimately return zero or many entries — its result is the
619/// function's final answer, not a "try the next strategy" signal.
620fn min_distance_candidates(possibilities: &[PathBuf], current_path: &Path) -> Vec<PathBuf> {
621 // Hold survivors as borrows during the walk: `Less` arms clear the
622 // prior set without dropping owned `PathBuf`s, and the trailing
623 // `cloned()` runs exactly once per final survivor — never on
624 // entries that were tentatively kept and later evicted.
625 let mut dist_min = usize::MAX;
626 let mut path_min: Vec<&PathBuf> = Vec::new();
627 for p in possibilities {
628 if current_path == p {
629 continue;
630 }
631 let Some(dist) = get_paths_dist(current_path, p) else {
632 continue;
633 };
634 match dist.cmp(&dist_min) {
635 Ordering::Less => {
636 dist_min = dist;
637 path_min.clear();
638 path_min.push(p);
639 }
640 Ordering::Equal => path_min.push(p),
641 Ordering::Greater => {}
642 }
643 }
644 path_min.into_iter().cloned().collect()
645}
646
647#[inline]
648pub(crate) fn color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
649 stdout.set_color(ColorSpec::new().set_fg(Some(color)))
650}
651
652#[inline]
653pub(crate) fn intense_color(stdout: &mut StandardStreamLock, color: Color) -> std::io::Result<()> {
654 stdout.set_color(ColorSpec::new().set_fg(Some(color)).set_intense(true))
655}
656
657#[cfg(test)]
658pub(crate) fn check_func_space<T: crate::ParserTrait, F: Fn(crate::FuncSpace)>(
659 source: &str,
660 filename: &str,
661 check: F,
662) {
663 let path = std::path::PathBuf::from(filename);
664 // Mirror the CRLF/CR normalisation that read_file_with_eol applies via normalize_line_endings
665 let normalized = source.replace("\r\n", "\n").replace('\r', "\n");
666 let mut trimmed_bytes = normalized.trim_end().trim_matches('\n').as_bytes().to_vec();
667 trimmed_bytes.push(b'\n');
668 let parser = T::new(trimmed_bytes, &path, None);
669 #[allow(deprecated)]
670 let func_space = crate::metrics(&parser, &path).unwrap();
671
672 check(func_space);
673}
674
675#[cfg(test)]
676pub(crate) fn check_metrics<T: crate::ParserTrait>(
677 source: &str,
678 filename: &str,
679 check: fn(crate::CodeMetrics) -> (),
680) {
681 check_func_space::<T, _>(source, filename, |func_space| check(func_space.metrics));
682}
683
684/// Asserts that `func_space` has a direct child space named `name` and that
685/// its `kind` matches `expected`.
686///
687/// Used by annotation-type / class / interface tests that need to verify
688/// the structural FuncSpace tree (not just metric values), since vacuous
689/// metric assertions can pass even when `is_func_space` has been reverted
690/// for the node kind under test.
691#[cfg(test)]
692pub(crate) fn assert_child_space_kind(
693 func_space: &crate::FuncSpace,
694 name: &str,
695 expected: crate::SpaceKind,
696) {
697 let child = func_space
698 .spaces
699 .iter()
700 .find(|s| s.name.as_deref() == Some(name))
701 .unwrap_or_else(|| panic!("expected a child FuncSpace named {name:?}"));
702 assert_eq!(
703 child.kind, expected,
704 "child FuncSpace {name:?} kind: got {:?}, expected {:?}",
705 child.kind, expected,
706 );
707}
708
709#[cfg(test)]
710#[path = "tools_tests.rs"]
711mod tests;