Skip to main content

sloc_languages/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4pub mod style;
5pub use style::{IndentStyle, StyleAnalysis, StyleGuideScore, StyleSignal};
6
7use std::collections::{BTreeMap, BTreeSet, HashSet};
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum Language {
15    C,
16    Cpp,
17    CSharp,
18    Go,
19    Java,
20    JavaScript,
21    Python,
22    Rust,
23    Shell,
24    PowerShell,
25    TypeScript,
26    // --- Extended language support ---
27    Assembly,
28    Clojure,
29    Css,
30    Dart,
31    Dockerfile,
32    Elixir,
33    Erlang,
34    FSharp,
35    Groovy,
36    Haskell,
37    Html,
38    Julia,
39    Kotlin,
40    Lua,
41    Makefile,
42    Nim,
43    ObjectiveC,
44    Ocaml,
45    Perl,
46    Php,
47    R,
48    Ruby,
49    Scala,
50    Scss,
51    Sql,
52    Svelte,
53    Swift,
54    Vue,
55    Xml,
56    Zig,
57    // --- Pass 1: modern declarative / smart-contract languages ---
58    Solidity,
59    Protobuf,
60    Hcl,
61    GraphQl,
62    // --- Pass 2: legacy + embedded / hardware-description languages ---
63    Ada,
64    Vhdl,
65    Verilog,
66    Tcl,
67    Pascal,
68    VisualBasic,
69    Lisp,
70    // --- Pass 3: scientific / infra / systems / graphics ---
71    Fortran,
72    Nix,
73    Crystal,
74    D,
75    Glsl,
76    Cmake,
77    Elm,
78    Awk,
79}
80
81impl Language {
82    #[must_use]
83    pub const fn display_name(&self) -> &'static str {
84        match self {
85            Self::C => "C",
86            Self::Cpp => "C++",
87            Self::CSharp => "C#",
88            Self::Go => "Go",
89            Self::Java => "Java",
90            Self::JavaScript => "JavaScript",
91            Self::Python => "Python",
92            Self::Rust => "Rust",
93            Self::Shell => "Shell",
94            Self::PowerShell => "PowerShell",
95            Self::TypeScript => "TypeScript",
96            Self::Assembly => "Assembly",
97            Self::Clojure => "Clojure",
98            Self::Css => "CSS",
99            Self::Dart => "Dart",
100            Self::Dockerfile => "Dockerfile",
101            Self::Elixir => "Elixir",
102            Self::Erlang => "Erlang",
103            Self::FSharp => "F#",
104            Self::Groovy => "Groovy",
105            Self::Haskell => "Haskell",
106            Self::Html => "HTML",
107            Self::Julia => "Julia",
108            Self::Kotlin => "Kotlin",
109            Self::Lua => "Lua",
110            Self::Makefile => "Makefile",
111            Self::Nim => "Nim",
112            Self::ObjectiveC => "Objective-C",
113            Self::Ocaml => "OCaml",
114            Self::Perl => "Perl",
115            Self::Php => "PHP",
116            Self::R => "R",
117            Self::Ruby => "Ruby",
118            Self::Scala => "Scala",
119            Self::Scss => "SCSS",
120            Self::Sql => "SQL",
121            Self::Svelte => "Svelte",
122            Self::Swift => "Swift",
123            Self::Vue => "Vue",
124            Self::Xml => "XML",
125            Self::Zig => "Zig",
126            Self::Solidity => "Solidity",
127            Self::Protobuf => "Protocol Buffers",
128            Self::Hcl => "HCL/Terraform",
129            Self::GraphQl => "GraphQL",
130            Self::Ada => "Ada",
131            Self::Vhdl => "VHDL",
132            Self::Verilog => "Verilog/SystemVerilog",
133            Self::Tcl => "Tcl",
134            Self::Pascal => "Pascal/Delphi",
135            Self::VisualBasic => "Visual Basic",
136            Self::Lisp => "Lisp/Scheme",
137            Self::Fortran => "Fortran",
138            Self::Nix => "Nix",
139            Self::Crystal => "Crystal",
140            Self::D => "D",
141            Self::Glsl => "GLSL/HLSL",
142            Self::Cmake => "CMake",
143            Self::Elm => "Elm",
144            Self::Awk => "Awk",
145        }
146    }
147
148    #[must_use]
149    pub const fn as_slug(&self) -> &'static str {
150        match self {
151            Self::C => "c",
152            Self::Cpp => "cpp",
153            Self::CSharp => "csharp",
154            Self::Go => "go",
155            Self::Java => "java",
156            Self::JavaScript => "javascript",
157            Self::Python => "python",
158            Self::Rust => "rust",
159            Self::Shell => "shell",
160            Self::PowerShell => "powershell",
161            Self::TypeScript => "typescript",
162            Self::Assembly => "assembly",
163            Self::Clojure => "clojure",
164            Self::Css => "css",
165            Self::Dart => "dart",
166            Self::Dockerfile => "dockerfile",
167            Self::Elixir => "elixir",
168            Self::Erlang => "erlang",
169            Self::FSharp => "fsharp",
170            Self::Groovy => "groovy",
171            Self::Haskell => "haskell",
172            Self::Html => "html",
173            Self::Julia => "julia",
174            Self::Kotlin => "kotlin",
175            Self::Lua => "lua",
176            Self::Makefile => "makefile",
177            Self::Nim => "nim",
178            Self::ObjectiveC => "objectivec",
179            Self::Ocaml => "ocaml",
180            Self::Perl => "perl",
181            Self::Php => "php",
182            Self::R => "r",
183            Self::Ruby => "ruby",
184            Self::Scala => "scala",
185            Self::Scss => "scss",
186            Self::Sql => "sql",
187            Self::Svelte => "svelte",
188            Self::Swift => "swift",
189            Self::Vue => "vue",
190            Self::Xml => "xml",
191            Self::Zig => "zig",
192            Self::Solidity => "solidity",
193            Self::Protobuf => "protobuf",
194            Self::Hcl => "hcl",
195            Self::GraphQl => "graphql",
196            Self::Ada => "ada",
197            Self::Vhdl => "vhdl",
198            Self::Verilog => "verilog",
199            Self::Tcl => "tcl",
200            Self::Pascal => "pascal",
201            Self::VisualBasic => "visualbasic",
202            Self::Lisp => "lisp",
203            Self::Fortran => "fortran",
204            Self::Nix => "nix",
205            Self::Crystal => "crystal",
206            Self::D => "d",
207            Self::Glsl => "glsl",
208            Self::Cmake => "cmake",
209            Self::Elm => "elm",
210            Self::Awk => "awk",
211        }
212    }
213
214    #[must_use]
215    pub fn from_name(name: &str) -> Option<Self> {
216        match name.trim().to_ascii_lowercase().as_str() {
217            "c" => Some(Self::C),
218            "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
219            "csharp" | "c#" | "cs" => Some(Self::CSharp),
220            "go" | "golang" => Some(Self::Go),
221            "java" => Some(Self::Java),
222            "javascript" | "js" => Some(Self::JavaScript),
223            "python" | "py" => Some(Self::Python),
224            "rust" | "rs" => Some(Self::Rust),
225            "shell" | "sh" | "bash" => Some(Self::Shell),
226            "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
227            "typescript" | "ts" => Some(Self::TypeScript),
228            "assembly" | "asm" => Some(Self::Assembly),
229            "clojure" | "clj" => Some(Self::Clojure),
230            "css" => Some(Self::Css),
231            "dart" => Some(Self::Dart),
232            "dockerfile" | "docker" => Some(Self::Dockerfile),
233            "elixir" | "ex" => Some(Self::Elixir),
234            "erlang" | "erl" => Some(Self::Erlang),
235            "fsharp" | "f#" | "fs" => Some(Self::FSharp),
236            "groovy" => Some(Self::Groovy),
237            "haskell" | "hs" => Some(Self::Haskell),
238            "html" | "htm" => Some(Self::Html),
239            "julia" | "jl" => Some(Self::Julia),
240            "kotlin" | "kt" => Some(Self::Kotlin),
241            "lua" => Some(Self::Lua),
242            "makefile" | "make" | "mk" => Some(Self::Makefile),
243            "nim" => Some(Self::Nim),
244            "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
245            "ocaml" | "ml" => Some(Self::Ocaml),
246            "perl" | "pl" => Some(Self::Perl),
247            "php" => Some(Self::Php),
248            "r" => Some(Self::R),
249            "ruby" | "rb" => Some(Self::Ruby),
250            "scala" => Some(Self::Scala),
251            "scss" | "sass" => Some(Self::Scss),
252            "sql" => Some(Self::Sql),
253            "svelte" => Some(Self::Svelte),
254            "swift" => Some(Self::Swift),
255            "vue" => Some(Self::Vue),
256            "xml" => Some(Self::Xml),
257            "zig" => Some(Self::Zig),
258            "solidity" | "sol" => Some(Self::Solidity),
259            "protobuf" | "proto" | "protocolbuffers" => Some(Self::Protobuf),
260            "hcl" | "terraform" | "tf" => Some(Self::Hcl),
261            "graphql" | "gql" => Some(Self::GraphQl),
262            "ada" => Some(Self::Ada),
263            "vhdl" => Some(Self::Vhdl),
264            "verilog" | "systemverilog" | "sv" => Some(Self::Verilog),
265            "tcl" => Some(Self::Tcl),
266            "pascal" | "delphi" | "pas" => Some(Self::Pascal),
267            "visualbasic" | "vb" | "vbnet" | "vb.net" => Some(Self::VisualBasic),
268            "lisp" | "scheme" | "racket" | "clisp" | "elisp" => Some(Self::Lisp),
269            "fortran" | "f90" | "f95" => Some(Self::Fortran),
270            "nix" => Some(Self::Nix),
271            "crystal" | "cr" => Some(Self::Crystal),
272            "d" | "dlang" => Some(Self::D),
273            "glsl" | "hlsl" | "shader" | "wgsl" => Some(Self::Glsl),
274            "cmake" => Some(Self::Cmake),
275            "elm" => Some(Self::Elm),
276            "awk" => Some(Self::Awk),
277            _ => None,
278        }
279    }
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, Default)]
283pub struct RawLineCounts {
284    pub total_physical_lines: u64,
285    pub blank_only_lines: u64,
286    pub code_only_lines: u64,
287    pub single_comment_only_lines: u64,
288    pub multi_comment_only_lines: u64,
289    pub mixed_code_single_comment_lines: u64,
290    pub mixed_code_multi_comment_lines: u64,
291    pub docstring_comment_lines: u64,
292    pub skipped_unknown_lines: u64,
293    /// Best-effort count of function/method definition lines detected lexically.
294    #[serde(default)]
295    pub functions: u64,
296    /// Best-effort count of class/struct/trait/type definition lines detected lexically.
297    #[serde(default)]
298    pub classes: u64,
299    /// Best-effort count of variable declaration lines detected lexically.
300    #[serde(default)]
301    pub variables: u64,
302    /// Best-effort count of import/use/include statement lines detected lexically.
303    #[serde(default)]
304    pub imports: u64,
305    /// Lines consisting solely of preprocessor/compiler directives (e.g. `#include`, `#define`
306    /// in C/C++/Objective-C). Always a subset of `code_only_lines`. Controlled by
307    /// `AnalysisConfig::count_compiler_directives`. IEEE 1045-1992 §4.2.
308    #[serde(default)]
309    pub compiler_directive_lines: u64,
310    /// Best-effort count of test case / test function definition lines detected lexically
311    /// (`GTest`, Catch2, `PyTest`, `JUnit`, etc.). Always a subset of `code_only_lines`.
312    #[serde(default)]
313    pub test_count: u64,
314    /// Best-effort count of test assertion call lines detected lexically
315    /// (`ASSERT_EQ`, `EXPECT_TRUE`, assertEquals, Assert.AreEqual, `assert_eq`!, etc.).
316    #[serde(default)]
317    pub test_assertion_count: u64,
318    /// Best-effort count of test suite / fixture / group declaration lines detected lexically
319    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
320    #[serde(default)]
321    pub test_suite_count: u64,
322    /// Cyclomatic complexity approximation: total count of branch decision keywords found on
323    /// code lines (e.g. `if`, `for`, `while`, `||`, `&&`). Starts at 0; +1 per keyword hit.
324    #[serde(default)]
325    pub cyclomatic_complexity: u32,
326    /// Logical SLOC estimate: executable statement count using a language-specific strategy.
327    /// `None` when the language does not support lexical LSLOC estimation.
328    #[serde(default, skip_serializing_if = "Option::is_none")]
329    pub lsloc: Option<u32>,
330    /// Per-code-line content hashes (trimmed) for ULOC aggregation. Never serialized — only
331    /// populated during an in-process scan and consumed by `sloc-core` during aggregation.
332    #[serde(skip)]
333    pub code_line_hashes: Vec<u64>,
334}
335
336#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ParseMode {
339    Lexical,
340    LexicalBestEffort,
341    TreeSitter,
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct RawFileAnalysis {
346    pub raw: RawLineCounts,
347    pub parse_mode: ParseMode,
348    pub warnings: Vec<String>,
349    /// Lexical style-guide analysis for supported languages; `None` when no heuristics apply.
350    #[serde(default, skip_serializing_if = "Option::is_none")]
351    pub style_analysis: Option<StyleAnalysis>,
352}
353
354/// IEEE 1045-1992 counting options passed from `sloc-core` (built from `AnalysisConfig`).
355///
356/// `analyze_text` accepts this struct so that the caller can control behaviour that the
357/// standard defines as configurable parameters rather than fixed conventions.
358#[derive(Debug, Clone, Copy)]
359pub struct AnalysisOptions {
360    /// When `true` (IEEE 1045-1992 default), blank lines inside block comments count as
361    /// comment lines rather than blank lines.
362    pub blank_in_block_comment_as_comment: bool,
363    /// When `true`, backslash-continued physical lines are collapsed into a single logical
364    /// line for SLOC counting purposes (IEEE logical SLOC mode).
365    pub collapse_continuation_lines: bool,
366    /// When `true` (default), run lexical style-guide heuristics and populate
367    /// `RawFileAnalysis::style_analysis`. Set to `false` to skip style scoring entirely.
368    pub enable_style: bool,
369    /// Restrict style analysis to a specific language family slug (`"all"` or `"c_family"`).
370    /// When `"c_family"`, only C / C++ / Objective-C files are style-analysed.
371    pub style_lang_scope: StyleLangScope,
372}
373
374/// Which language families receive style-guide heuristic analysis.
375#[derive(Debug, Clone, Copy, PartialEq, Eq)]
376pub enum StyleLangScope {
377    All,
378    CFamilyOnly,
379}
380
381/// Strategy for computing Logical SLOC (LSLOC) from a physical-line scan.
382#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum LslocStrategy {
384    /// Count semicolons on code lines (C, C++, Java, C#, Go, Rust, JS/TS, Kotlin, SQL, …).
385    Semicolons,
386    /// Count non-blank code lines whose trimmed content does not end with a continuation
387    /// character (`\`, `,`, `(`, `[`, `{`). Suitable for Python, Ruby, Shell, Elixir, Nim.
388    NonContinuationNewlines,
389    /// Language does not have a well-defined statement boundary detectable by simple
390    /// lexical heuristics; `lsloc` will be `None` for files of this type.
391    Unsupported,
392}
393
394impl Default for AnalysisOptions {
395    fn default() -> Self {
396        Self {
397            blank_in_block_comment_as_comment: true,
398            collapse_continuation_lines: false,
399            enable_style: true,
400            style_lang_scope: StyleLangScope::All,
401        }
402    }
403}
404
405#[must_use]
406pub fn supported_languages() -> BTreeSet<Language> {
407    [
408        Language::Assembly,
409        Language::C,
410        Language::Clojure,
411        Language::Cpp,
412        Language::CSharp,
413        Language::Css,
414        Language::Dart,
415        Language::Dockerfile,
416        Language::Elixir,
417        Language::Erlang,
418        Language::FSharp,
419        Language::Go,
420        Language::Groovy,
421        Language::Haskell,
422        Language::Html,
423        Language::Java,
424        Language::JavaScript,
425        Language::Julia,
426        Language::Kotlin,
427        Language::Lua,
428        Language::Makefile,
429        Language::Nim,
430        Language::ObjectiveC,
431        Language::Ocaml,
432        Language::Perl,
433        Language::Php,
434        Language::PowerShell,
435        Language::Python,
436        Language::R,
437        Language::Ruby,
438        Language::Rust,
439        Language::Scala,
440        Language::Scss,
441        Language::Shell,
442        Language::Sql,
443        Language::Svelte,
444        Language::Swift,
445        Language::TypeScript,
446        Language::Vue,
447        Language::Xml,
448        Language::Zig,
449        Language::Solidity,
450        Language::Protobuf,
451        Language::Hcl,
452        Language::GraphQl,
453        Language::Ada,
454        Language::Vhdl,
455        Language::Verilog,
456        Language::Tcl,
457        Language::Pascal,
458        Language::VisualBasic,
459        Language::Lisp,
460        Language::Fortran,
461        Language::Nix,
462        Language::Crystal,
463        Language::D,
464        Language::Glsl,
465        Language::Cmake,
466        Language::Elm,
467        Language::Awk,
468    ]
469    .into_iter()
470    .collect()
471}
472
473/// Detect language from a shebang line (e.g. `#!/usr/bin/env python3`).
474fn detect_by_shebang(line: &str) -> Option<Language> {
475    let lower = line.to_ascii_lowercase();
476    if !lower.starts_with("#!") {
477        return None;
478    }
479    if lower.contains("python") {
480        return Some(Language::Python);
481    }
482    if lower.contains("pwsh") || lower.contains("powershell") {
483        return Some(Language::PowerShell);
484    }
485    if lower.contains("bash")
486        || lower.contains("/sh")
487        || lower.contains("zsh")
488        || lower.contains("ksh")
489    {
490        return Some(Language::Shell);
491    }
492    if lower.contains("ruby") {
493        return Some(Language::Ruby);
494    }
495    if lower.contains("perl") {
496        return Some(Language::Perl);
497    }
498    if lower.contains("php") {
499        return Some(Language::Php);
500    }
501    if lower.contains("node") || lower.contains("nodejs") {
502        return Some(Language::JavaScript);
503    }
504    None
505}
506
507/// Detect language purely from a (lowercased) file extension.
508#[allow(clippy::too_many_lines)]
509fn detect_by_extension(ext: &str) -> Option<Language> {
510    // Static table avoids a large match statement; each extension maps 1-to-1 to a language.
511    static EXT_MAP: &[(&str, Language)] = &[
512        ("c", Language::C),
513        ("h", Language::C),
514        ("cc", Language::Cpp),
515        ("cp", Language::Cpp),
516        ("cpp", Language::Cpp),
517        ("cxx", Language::Cpp),
518        ("hh", Language::Cpp),
519        ("hpp", Language::Cpp),
520        ("hxx", Language::Cpp),
521        ("cs", Language::CSharp),
522        ("go", Language::Go),
523        ("java", Language::Java),
524        ("js", Language::JavaScript),
525        ("mjs", Language::JavaScript),
526        ("cjs", Language::JavaScript),
527        ("py", Language::Python),
528        ("rs", Language::Rust),
529        ("sh", Language::Shell),
530        ("bash", Language::Shell),
531        ("zsh", Language::Shell),
532        ("ksh", Language::Shell),
533        ("ps1", Language::PowerShell),
534        ("psm1", Language::PowerShell),
535        ("psd1", Language::PowerShell),
536        ("ts", Language::TypeScript),
537        ("mts", Language::TypeScript),
538        ("cts", Language::TypeScript),
539        ("tsx", Language::TypeScript),
540        ("jsx", Language::JavaScript),
541        ("asm", Language::Assembly),
542        ("s", Language::Assembly),
543        ("clj", Language::Clojure),
544        ("cljs", Language::Clojure),
545        ("cljc", Language::Clojure),
546        ("edn", Language::Clojure),
547        ("css", Language::Css),
548        ("dart", Language::Dart),
549        ("ex", Language::Elixir),
550        ("exs", Language::Elixir),
551        ("erl", Language::Erlang),
552        ("hrl", Language::Erlang),
553        ("fs", Language::FSharp),
554        ("fsi", Language::FSharp),
555        ("fsx", Language::FSharp),
556        ("groovy", Language::Groovy),
557        ("gradle", Language::Groovy),
558        ("hs", Language::Haskell),
559        ("lhs", Language::Haskell),
560        ("html", Language::Html),
561        ("htm", Language::Html),
562        ("xhtml", Language::Html),
563        ("jl", Language::Julia),
564        ("kt", Language::Kotlin),
565        ("kts", Language::Kotlin),
566        ("lua", Language::Lua),
567        ("mk", Language::Makefile),
568        ("nim", Language::Nim),
569        ("nims", Language::Nim),
570        ("m", Language::ObjectiveC),
571        ("mm", Language::ObjectiveC),
572        ("ml", Language::Ocaml),
573        ("mli", Language::Ocaml),
574        ("pl", Language::Perl),
575        ("pm", Language::Perl),
576        ("t", Language::Perl),
577        ("php", Language::Php),
578        ("php3", Language::Php),
579        ("php4", Language::Php),
580        ("php5", Language::Php),
581        ("php7", Language::Php),
582        ("phtml", Language::Php),
583        ("r", Language::R),
584        ("rb", Language::Ruby),
585        ("rake", Language::Ruby),
586        ("scala", Language::Scala),
587        ("sc", Language::Scala),
588        ("scss", Language::Scss),
589        ("sass", Language::Scss),
590        ("sql", Language::Sql),
591        ("svelte", Language::Svelte),
592        ("swift", Language::Swift),
593        ("vue", Language::Vue),
594        ("xml", Language::Xml),
595        ("xsd", Language::Xml),
596        ("xsl", Language::Xml),
597        ("xslt", Language::Xml),
598        ("svg", Language::Xml),
599        ("zig", Language::Zig),
600        ("sol", Language::Solidity),
601        ("proto", Language::Protobuf),
602        ("tf", Language::Hcl),
603        ("tfvars", Language::Hcl),
604        ("hcl", Language::Hcl),
605        ("graphql", Language::GraphQl),
606        ("gql", Language::GraphQl),
607        ("adb", Language::Ada),
608        ("ads", Language::Ada),
609        ("ada", Language::Ada),
610        ("vhd", Language::Vhdl),
611        ("vhdl", Language::Vhdl),
612        ("v", Language::Verilog),
613        ("sv", Language::Verilog),
614        ("svh", Language::Verilog),
615        ("vh", Language::Verilog),
616        ("tcl", Language::Tcl),
617        ("pas", Language::Pascal),
618        ("dpr", Language::Pascal),
619        ("vb", Language::VisualBasic),
620        ("bas", Language::VisualBasic),
621        ("lisp", Language::Lisp),
622        ("lsp", Language::Lisp),
623        ("el", Language::Lisp),
624        ("scm", Language::Lisp),
625        ("ss", Language::Lisp),
626        ("rkt", Language::Lisp),
627        ("f90", Language::Fortran),
628        ("f95", Language::Fortran),
629        ("f03", Language::Fortran),
630        ("f08", Language::Fortran),
631        ("f", Language::Fortran),
632        ("for", Language::Fortran),
633        ("nix", Language::Nix),
634        ("cr", Language::Crystal),
635        ("d", Language::D),
636        ("glsl", Language::Glsl),
637        ("vert", Language::Glsl),
638        ("frag", Language::Glsl),
639        ("comp", Language::Glsl),
640        ("geom", Language::Glsl),
641        ("tesc", Language::Glsl),
642        ("tese", Language::Glsl),
643        ("hlsl", Language::Glsl),
644        ("wgsl", Language::Glsl),
645        ("cmake", Language::Cmake),
646        ("elm", Language::Elm),
647        ("awk", Language::Awk),
648    ];
649    EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
650}
651
652/// Detect language from an exact filename (no extension) or well-known filename patterns.
653fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
654    // Dockerfile: exact name or Dockerfile.* variant
655    if filename == "Dockerfile"
656        || filename.starts_with("Dockerfile.")
657        || filename_lower == "dockerfile"
658    {
659        return Some(Language::Dockerfile);
660    }
661    // Makefile variants
662    if matches!(
663        filename,
664        "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
665    ) {
666        return Some(Language::Makefile);
667    }
668    // Ruby ecosystem files that have no extension
669    if matches!(
670        filename,
671        "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
672    ) {
673        return Some(Language::Ruby);
674    }
675    // CMake build scripts: `CMakeLists.txt` has a `.txt` extension, so it must be
676    // matched by exact name before extension-based detection.
677    if filename == "CMakeLists.txt" || filename_lower == "cmakelists.txt" {
678        return Some(Language::Cmake);
679    }
680    None
681}
682
683#[must_use]
684#[allow(clippy::too_many_lines)]
685pub fn detect_language(
686    path: &Path,
687    first_line: Option<&str>,
688    extension_overrides: &BTreeMap<String, String>,
689    shebang_detection: bool,
690) -> Option<Language> {
691    let extension = path
692        .extension()
693        .and_then(|ext| ext.to_str())
694        .map(str::to_ascii_lowercase);
695
696    // Extension override check (user-configured mappings win over everything)
697    if let Some(ext) = extension.as_ref() {
698        if let Some(override_name) = extension_overrides.get(ext.as_str()) {
699            if let Some(lang) = Language::from_name(override_name) {
700                return Some(lang);
701            }
702        }
703    }
704
705    // Filename-based detection for files that have no extension or use exact names
706    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
707    let filename_lower = filename.to_ascii_lowercase();
708
709    if let Some(lang) = detect_by_filename(filename, &filename_lower) {
710        return Some(lang);
711    }
712
713    // Extension-based detection
714    if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
715        return Some(lang);
716    }
717
718    // Shebang detection (last resort — only for extensionless scripts)
719    if shebang_detection {
720        if let Some(line) = first_line {
721            if let Some(lang) = detect_by_shebang(line) {
722                return Some(lang);
723            }
724        }
725    }
726
727    None
728}
729
730#[must_use]
731pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
732    // tree-sitter fast-paths (compiled out when feature is disabled)
733    #[cfg(feature = "tree-sitter")]
734    {
735        match language {
736            Language::C | Language::Cpp => {
737                if let Some(mut result) = ts::analyze_c(text) {
738                    if options.enable_style
739                        && should_style_analyse(language, options.style_lang_scope)
740                    {
741                        result.style_analysis = style::analyze_style(language, text);
742                    }
743                    return result;
744                }
745            }
746            Language::Python => {
747                if let Some(result) = ts::analyze_python(text) {
748                    return result;
749                }
750            }
751            _ => {}
752        }
753    }
754
755    let (mut config, has_preprocessor) = language_scan_config(language);
756
757    // Python docstring lines are computed from the text and cannot be a static constant.
758    if language == Language::Python {
759        config.skip_lines = detect_python_docstring_lines(text);
760    }
761
762    // C, C++, and Objective-C have a preprocessor whose directive lines are tracked separately
763    // per IEEE 1045-1992 §4.2; every other language uses base flags.
764    let flags = IeeeFlags {
765        has_preprocessor_directives: has_preprocessor,
766        blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
767        collapse_continuation_lines: options.collapse_continuation_lines,
768    };
769    let mut result = analyze_generic(text, config, flags);
770    if options.enable_style && should_style_analyse(language, options.style_lang_scope) {
771        result.style_analysis = style::analyze_style(language, text);
772    }
773    result
774}
775
776/// Returns `true` when `language` should be style-analysed under `scope`.
777const fn should_style_analyse(language: Language, scope: StyleLangScope) -> bool {
778    match scope {
779        StyleLangScope::CFamilyOnly => {
780            matches!(language, Language::C | Language::Cpp | Language::ObjectiveC)
781        }
782        StyleLangScope::All => true,
783    }
784}
785
786/// Returns the lexical scan configuration for `language` and whether it uses a C preprocessor.
787/// All fields are static constants except `skip_lines`, which is always empty here; callers that
788/// need non-empty skip sets (currently only Python) must populate the field after this call.
789///
790/// The implementation delegates to `LANG_SCAN_TABLE` (a static `&[(Language, StaticLangConfig)]`)
791/// defined below the `SP_*` symbol-pattern constants.  Each language appears exactly once in the
792/// table, so the linear scan is O(|languages|) but avoids a 41-arm `match` statement.
793fn language_scan_config(language: Language) -> (ScanConfig, bool) {
794    let cfg = LANG_SCAN_TABLE
795        .iter()
796        .find_map(|&(l, c)| (l == language).then_some(c))
797        .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
798    let (branch_keywords, lsloc_strategy) = language_complexity_config(language);
799    (
800        ScanConfig {
801            line_comments: cfg.line_comments,
802            block_comment: cfg.block_comment,
803            allow_single_quote_strings: cfg.allow_single_quote_strings,
804            allow_double_quote_strings: cfg.allow_double_quote_strings,
805            allow_triple_quote_strings: cfg.allow_triple_quote_strings,
806            allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
807            skip_lines: HashSet::new(),
808            symbol_patterns: cfg.symbol_patterns,
809            branch_keywords,
810            lsloc_strategy,
811        },
812        cfg.has_preprocessor,
813    )
814}
815
816// ── Cyclomatic complexity branch-keyword lists ────────────────────────────────
817// Alphabetic tokens are matched word-bounded; operator tokens (||, &&, ?) are
818// matched as raw substrings.  Each list covers one language family.
819
820const BRANCH_C_FAMILY: &[&str] = &[
821    "if", "else", "for", "while", "switch", "case", "catch", "||", "&&",
822];
823const BRANCH_C_TERNARY: &[&str] = &[
824    "if", "else", "for", "while", "switch", "case", "catch", "||", "&&", "?",
825];
826const BRANCH_GO: &[&str] = &["if", "else", "for", "switch", "case", "select", "||", "&&"];
827const BRANCH_RUST: &[&str] = &["if", "else", "for", "while", "match", "||", "&&"];
828const BRANCH_ZIG: &[&str] = &["if", "else", "for", "while", "switch", "catch", "||", "&&"];
829const BRANCH_FSHARP: &[&str] = &["if", "then", "else", "elif", "match", "when", "||", "&&"];
830const BRANCH_LUA: &[&str] = &[
831    "if", "elseif", "else", "for", "while", "repeat", "and", "or",
832];
833const BRANCH_HASKELL: &[&str] = &["if", "then", "else", "case", "otherwise"];
834const BRANCH_SQL: &[&str] = &["CASE", "WHEN", "IF", "ELSE", "case", "when", "if", "else"];
835const BRANCH_OCAML: &[&str] = &["if", "then", "else", "match", "when", "||", "&&"];
836const BRANCH_CLOJURE: &[&str] = &["if", "when", "cond", "case", "and", "or"];
837const BRANCH_PHP: &[&str] = &[
838    "if", "elseif", "else", "for", "while", "switch", "case", "catch", "match", "||", "&&", "?",
839];
840const BRANCH_JULIA: &[&str] = &["if", "elseif", "else", "for", "while", "catch", "||", "&&"];
841const BRANCH_PYTHON: &[&str] = &["if", "elif", "else", "for", "while", "except", "or", "and"];
842const BRANCH_RUBY: &[&str] = &[
843    "if", "elsif", "else", "unless", "until", "while", "case", "when", "rescue", "||", "&&",
844];
845const BRANCH_SHELL: &[&str] = &["if", "elif", "else", "while", "until", "case", "||", "&&"];
846const BRANCH_ELIXIR: &[&str] = &[
847    "if", "else", "cond", "case", "when", "rescue", "||", "&&", "and", "or",
848];
849const BRANCH_POWERSHELL: &[&str] = &[
850    "if", "elseif", "else", "for", "while", "switch", "foreach", "||", "&&",
851];
852const BRANCH_NIM: &[&str] = &[
853    "if", "elif", "else", "for", "while", "case", "of", "except", "and", "or",
854];
855const BRANCH_PERL: &[&str] = &[
856    "if", "elsif", "else", "unless", "until", "for", "while", "foreach", "||", "&&",
857];
858const BRANCH_R: &[&str] = &["if", "else", "for", "while", "repeat", "||", "&&"];
859// Pass 2 branch-keyword lists (legacy + embedded / HDL).
860const BRANCH_ADA: &[&str] = &[
861    "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or",
862];
863const BRANCH_VHDL: &[&str] = &[
864    "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or", "nand", "nor",
865    "xor",
866];
867const BRANCH_VERILOG: &[&str] = &[
868    "if", "else", "case", "casex", "casez", "for", "while", "&&", "||",
869];
870const BRANCH_TCL: &[&str] = &["if", "elseif", "else", "switch", "while", "for", "foreach"];
871const BRANCH_PASCAL: &[&str] = &[
872    "if", "then", "else", "case", "while", "for", "repeat", "until", "and", "or",
873];
874const BRANCH_VB: &[&str] = &[
875    "If", "Then", "ElseIf", "Else", "Select", "Case", "While", "For", "Do", "And", "Or",
876];
877const BRANCH_LISP: &[&str] = &["if", "when", "unless", "cond", "case", "and", "or"];
878// Pass 3 branch-keyword lists (scientific / infra / systems / graphics).
879const BRANCH_FORTRAN: &[&str] = &[
880    "if", "then", "else", "elseif", "case", "do", "while", "where",
881];
882const BRANCH_NIX: &[&str] = &["if", "then", "else"];
883const BRANCH_CMAKE: &[&str] = &["if(", "elseif(", "else(", "while(", "foreach("];
884const BRANCH_ELM: &[&str] = &["if", "then", "else", "case", "of"];
885const BRANCH_AWK: &[&str] = &["if", "else", "while", "for", "do"];
886
887/// Returns (`branch_keywords`, `lsloc_strategy`) for the given language.
888/// Kept separate from `LANG_SCAN_TABLE` to avoid touching that large table.
889const fn language_complexity_config(
890    language: Language,
891) -> (&'static [&'static str], LslocStrategy) {
892    match language {
893        // ── C-ternary family (ternary operator counted as branch) ─────────────
894        Language::C
895        | Language::Cpp
896        | Language::ObjectiveC
897        | Language::CSharp
898        | Language::JavaScript
899        | Language::TypeScript
900        | Language::Svelte
901        | Language::Vue
902        | Language::Dart
903        | Language::Groovy
904        | Language::Swift
905        | Language::Solidity => (BRANCH_C_TERNARY, LslocStrategy::Semicolons),
906        // ── C-family (no ternary keyword) ────────────────────────────────────
907        Language::Java | Language::Kotlin | Language::Scala | Language::D | Language::Glsl => {
908            (BRANCH_C_FAMILY, LslocStrategy::Semicolons)
909        }
910        Language::Go => (BRANCH_GO, LslocStrategy::Semicolons),
911        Language::Rust => (BRANCH_RUST, LslocStrategy::Semicolons),
912        Language::Zig => (BRANCH_ZIG, LslocStrategy::Semicolons),
913        Language::FSharp => (BRANCH_FSHARP, LslocStrategy::Unsupported),
914        // ── Hash-comment family ───────────────────────────────────────────────
915        Language::Shell => (BRANCH_SHELL, LslocStrategy::NonContinuationNewlines),
916        Language::Elixir => (BRANCH_ELIXIR, LslocStrategy::NonContinuationNewlines),
917        Language::Perl => (BRANCH_PERL, LslocStrategy::Semicolons),
918        Language::R => (BRANCH_R, LslocStrategy::NonContinuationNewlines),
919        Language::Ruby | Language::Crystal => (BRANCH_RUBY, LslocStrategy::NonContinuationNewlines),
920        Language::Python => (BRANCH_PYTHON, LslocStrategy::NonContinuationNewlines),
921        Language::PowerShell => (BRANCH_POWERSHELL, LslocStrategy::Unsupported),
922        Language::Nim => (BRANCH_NIM, LslocStrategy::NonContinuationNewlines),
923        // ── Unique comment styles ─────────────────────────────────────────────
924        Language::Lua => (BRANCH_LUA, LslocStrategy::Unsupported),
925        Language::Haskell => (BRANCH_HASKELL, LslocStrategy::Unsupported),
926        Language::Sql => (BRANCH_SQL, LslocStrategy::Semicolons),
927        Language::Ocaml => (BRANCH_OCAML, LslocStrategy::Semicolons),
928        Language::Clojure => (BRANCH_CLOJURE, LslocStrategy::Unsupported),
929        Language::Php => (BRANCH_PHP, LslocStrategy::Semicolons),
930        Language::Julia => (BRANCH_JULIA, LslocStrategy::NonContinuationNewlines),
931        Language::Protobuf => (&[], LslocStrategy::Semicolons),
932        Language::Hcl => (&[], LslocStrategy::NonContinuationNewlines),
933        // ── Legacy / embedded / HDL ───────────────────────────────────────────
934        Language::Ada => (BRANCH_ADA, LslocStrategy::Semicolons),
935        Language::Vhdl => (BRANCH_VHDL, LslocStrategy::Semicolons),
936        Language::Verilog => (BRANCH_VERILOG, LslocStrategy::Semicolons),
937        Language::Tcl => (BRANCH_TCL, LslocStrategy::NonContinuationNewlines),
938        Language::Pascal => (BRANCH_PASCAL, LslocStrategy::Semicolons),
939        Language::VisualBasic => (BRANCH_VB, LslocStrategy::NonContinuationNewlines),
940        Language::Lisp => (BRANCH_LISP, LslocStrategy::Unsupported),
941        // ── Scientific / infra / systems / graphics ───────────────────────────
942        Language::Fortran => (BRANCH_FORTRAN, LslocStrategy::NonContinuationNewlines),
943        Language::Nix => (BRANCH_NIX, LslocStrategy::Unsupported),
944        Language::Cmake => (BRANCH_CMAKE, LslocStrategy::Unsupported),
945        Language::Elm => (BRANCH_ELM, LslocStrategy::Unsupported),
946        Language::Awk => (BRANCH_AWK, LslocStrategy::NonContinuationNewlines),
947        // ── No branch detection / syntax unsupported ──────────────────────────
948        Language::Makefile
949        | Language::Dockerfile
950        | Language::Css
951        | Language::Html
952        | Language::Xml
953        | Language::Assembly
954        | Language::Erlang
955        | Language::GraphQl
956        | Language::Scss => (&[], LslocStrategy::Unsupported),
957    }
958}
959
960/// Per-language keyword prefixes used for best-effort structural symbol detection.
961/// Each slice lists line prefixes (after leading whitespace is stripped) that indicate
962/// a definition of that category. Empty slice = detection disabled for that category.
963#[derive(Debug, Clone, Copy)]
964struct SymbolPatterns {
965    functions: &'static [&'static str],
966    /// Line prefixes that classify as a function only when the line ALSO contains `(`
967    /// AND there is no `=` between the prefix and the first `(`.  Used for C/C++ where
968    /// function definitions are led by the return type (`void`, `int`, `bool`, …) with
969    /// no dedicated keyword, so the paren guard distinguishes `void f(x)` from
970    /// `void* p = malloc(n)`.
971    functions_prefix_paren: &'static [&'static str],
972    classes: &'static [&'static str],
973    variables: &'static [&'static str],
974    imports: &'static [&'static str],
975    /// Line prefixes (after stripping leading whitespace) that indicate a test case or test
976    /// function definition. Matched against code lines only, same as other symbol categories.
977    tests: &'static [&'static str],
978    /// Line prefixes that indicate a test assertion call (`ASSERT_EQ`, assertEquals, `assert_eq`!,
979    /// Assert.AreEqual, etc.). Matched against code lines only.
980    assertions: &'static [&'static str],
981    /// Line prefixes that indicate a test suite / fixture / group declaration
982    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
983    test_suites: &'static [&'static str],
984}
985
986impl SymbolPatterns {
987    const fn none() -> Self {
988        Self {
989            functions: &[],
990            functions_prefix_paren: &[],
991            classes: &[],
992            variables: &[],
993            imports: &[],
994            tests: &[],
995            assertions: &[],
996            test_suites: &[],
997        }
998    }
999}
1000
1001const SP_NONE: SymbolPatterns = SymbolPatterns::none(); // all fields are &[]
1002
1003// Solidity: `function`/`modifier`/`constructor` definitions; `contract`/`interface`/
1004// `library` are the structural units (mapped to classes alongside struct/enum).
1005const SP_SOLIDITY: SymbolPatterns = SymbolPatterns {
1006    functions: &[
1007        "function ",
1008        "modifier ",
1009        "constructor",
1010        "receive ",
1011        "fallback ",
1012    ],
1013    functions_prefix_paren: &[],
1014    classes: &["contract ", "interface ", "library ", "struct ", "enum "],
1015    variables: &[],
1016    imports: &["import "],
1017    tests: &[],
1018    assertions: &[],
1019    test_suites: &[],
1020};
1021
1022// Protocol Buffers: `message`/`service`/`enum` declarations are the structural units;
1023// `rpc` entries are the closest thing to functions.
1024const SP_PROTOBUF: SymbolPatterns = SymbolPatterns {
1025    functions: &["rpc "],
1026    functions_prefix_paren: &[],
1027    classes: &["message ", "service ", "enum "],
1028    variables: &[],
1029    imports: &["import "],
1030    tests: &[],
1031    assertions: &[],
1032    test_suites: &[],
1033};
1034
1035// ── Pass 2 symbol patterns (legacy + embedded / HDL) ──────────────────────────
1036const SP_ADA: SymbolPatterns = SymbolPatterns {
1037    functions: &["procedure ", "function "],
1038    functions_prefix_paren: &[],
1039    classes: &["package ", "type ", "task ", "protected "],
1040    variables: &[],
1041    imports: &["with ", "use "],
1042    tests: &[],
1043    assertions: &[],
1044    test_suites: &[],
1045};
1046
1047const SP_VHDL: SymbolPatterns = SymbolPatterns {
1048    functions: &["function ", "procedure ", "process "],
1049    functions_prefix_paren: &[],
1050    classes: &["entity ", "architecture ", "package ", "component "],
1051    variables: &[],
1052    imports: &["library ", "use "],
1053    tests: &[],
1054    assertions: &[],
1055    test_suites: &[],
1056};
1057
1058const SP_VERILOG: SymbolPatterns = SymbolPatterns {
1059    functions: &["function ", "task "],
1060    functions_prefix_paren: &[],
1061    classes: &["module ", "interface ", "class ", "package "],
1062    variables: &[],
1063    imports: &["import ", "`include"],
1064    tests: &[],
1065    assertions: &[],
1066    test_suites: &[],
1067};
1068
1069const SP_TCL: SymbolPatterns = SymbolPatterns {
1070    functions: &["proc "],
1071    functions_prefix_paren: &[],
1072    classes: &[],
1073    variables: &[],
1074    imports: &["source ", "package require "],
1075    tests: &[],
1076    assertions: &[],
1077    test_suites: &[],
1078};
1079
1080const SP_PASCAL: SymbolPatterns = SymbolPatterns {
1081    functions: &["procedure ", "function "],
1082    functions_prefix_paren: &[],
1083    classes: &["type ", "class ", "record "],
1084    variables: &[],
1085    imports: &["uses "],
1086    tests: &[],
1087    assertions: &[],
1088    test_suites: &[],
1089};
1090
1091const SP_VB: SymbolPatterns = SymbolPatterns {
1092    functions: &[
1093        "Sub ",
1094        "Function ",
1095        "Private Sub ",
1096        "Public Sub ",
1097        "Private Function ",
1098        "Public Function ",
1099    ],
1100    functions_prefix_paren: &[],
1101    classes: &["Class ", "Module ", "Structure "],
1102    variables: &[],
1103    imports: &["Imports "],
1104    tests: &[],
1105    assertions: &[],
1106    test_suites: &[],
1107};
1108
1109const SP_LISP: SymbolPatterns = SymbolPatterns {
1110    functions: &["(defun ", "(defmacro ", "(define ", "(defmethod ", "(defn "],
1111    functions_prefix_paren: &[],
1112    classes: &["(defclass ", "(defstruct "],
1113    variables: &[],
1114    imports: &["(require ", "(import ", "(use-package "],
1115    tests: &[],
1116    assertions: &[],
1117    test_suites: &[],
1118};
1119
1120// ── Pass 3 symbol patterns (scientific / infra / systems / graphics) ──────────
1121const SP_FORTRAN: SymbolPatterns = SymbolPatterns {
1122    functions: &["subroutine ", "function "],
1123    functions_prefix_paren: &[],
1124    classes: &["module ", "program ", "type "],
1125    variables: &[],
1126    imports: &["use ", "include "],
1127    tests: &[],
1128    assertions: &[],
1129    test_suites: &[],
1130};
1131
1132const SP_CRYSTAL: SymbolPatterns = SymbolPatterns {
1133    functions: &["def "],
1134    functions_prefix_paren: &[],
1135    classes: &["class ", "module ", "struct ", "enum "],
1136    variables: &[],
1137    imports: &["require "],
1138    tests: &[],
1139    assertions: &[],
1140    test_suites: &[],
1141};
1142
1143const SP_D: SymbolPatterns = SymbolPatterns {
1144    functions: &[],
1145    functions_prefix_paren: &[],
1146    classes: &["class ", "struct ", "interface ", "enum ", "template "],
1147    variables: &[],
1148    imports: &["import "],
1149    tests: &[],
1150    assertions: &[],
1151    test_suites: &[],
1152};
1153
1154const SP_CMAKE: SymbolPatterns = SymbolPatterns {
1155    functions: &["function(", "macro("],
1156    functions_prefix_paren: &[],
1157    classes: &[],
1158    variables: &[],
1159    imports: &["include(", "add_subdirectory("],
1160    tests: &[],
1161    assertions: &[],
1162    test_suites: &[],
1163};
1164
1165const SP_ELM: SymbolPatterns = SymbolPatterns {
1166    functions: &[],
1167    functions_prefix_paren: &[],
1168    classes: &["type "],
1169    variables: &[],
1170    imports: &["import "],
1171    tests: &[],
1172    assertions: &[],
1173    test_suites: &[],
1174};
1175
1176const SP_AWK: SymbolPatterns = SymbolPatterns {
1177    functions: &["function "],
1178    functions_prefix_paren: &[],
1179    classes: &[],
1180    variables: &[],
1181    imports: &[],
1182    tests: &[],
1183    assertions: &[],
1184    test_suites: &[],
1185};
1186
1187const SP_RUST: SymbolPatterns = SymbolPatterns {
1188    functions: &[
1189        "fn ",
1190        "pub fn ",
1191        "pub(crate) fn ",
1192        "pub(super) fn ",
1193        "async fn ",
1194        "pub async fn ",
1195        "pub(crate) async fn ",
1196        "unsafe fn ",
1197        "pub unsafe fn ",
1198        "pub(crate) unsafe fn ",
1199        "const fn ",
1200        "pub const fn ",
1201        "pub(crate) const fn ",
1202        "extern fn ",
1203        "pub extern fn ",
1204    ],
1205    functions_prefix_paren: &[],
1206    classes: &[
1207        "struct ",
1208        "pub struct ",
1209        "pub(crate) struct ",
1210        "enum ",
1211        "pub enum ",
1212        "pub(crate) enum ",
1213        "trait ",
1214        "pub trait ",
1215        "pub(crate) trait ",
1216        "impl ",
1217        "impl<",
1218        "type ",
1219        "pub type ",
1220        "pub(crate) type ",
1221    ],
1222    variables: &["let ", "let mut "],
1223    imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
1224    // Built-in #[test], tokio/actix async test attributes, rstest
1225    tests: &[
1226        "#[test]",
1227        "#[tokio::test]",
1228        "#[actix_web::test]",
1229        "#[rstest]",
1230        "#[test_case",
1231    ],
1232    assertions: &[
1233        "assert_eq!(",
1234        "assert_ne!(",
1235        "assert!(",
1236        "assert_matches!(",
1237        "assert_err!(",
1238        "assert_ok!(",
1239    ],
1240    test_suites: &[],
1241};
1242
1243const SP_PYTHON: SymbolPatterns = SymbolPatterns {
1244    functions: &["def ", "async def "],
1245    functions_prefix_paren: &[],
1246    classes: &["class "],
1247    variables: &[],
1248    imports: &["import ", "from "],
1249    // pytest: test_ prefix functions and Test* classes; unittest: test_ methods
1250    tests: &["def test_", "async def test_", "class Test"],
1251    assertions: &[
1252        "self.assertEqual(",
1253        "self.assertNotEqual(",
1254        "self.assertTrue(",
1255        "self.assertFalse(",
1256        "self.assertIsNone(",
1257        "self.assertIsNotNone(",
1258        "self.assertIn(",
1259        "self.assertNotIn(",
1260        "self.assertRaises(",
1261        "self.assertAlmostEqual(",
1262    ],
1263    test_suites: &[],
1264};
1265
1266const SP_JS: SymbolPatterns = SymbolPatterns {
1267    functions: &[
1268        "function ",
1269        "async function ",
1270        "export function ",
1271        "export async function ",
1272        "export default function ",
1273    ],
1274    functions_prefix_paren: &[],
1275    classes: &["class ", "export class ", "export default class "],
1276    variables: &[
1277        "var ",
1278        "let ",
1279        "const ",
1280        "export var ",
1281        "export let ",
1282        "export const ",
1283    ],
1284    imports: &["import "],
1285    // Jest/Mocha/Jasmine: describe/it/test block openers
1286    tests: &[
1287        "describe(",
1288        "it(",
1289        "test(",
1290        "it.each(",
1291        "test.each(",
1292        "describe.each(",
1293    ],
1294    assertions: &["expect("],
1295    test_suites: &[],
1296};
1297
1298const SP_TS: SymbolPatterns = SymbolPatterns {
1299    functions: &[
1300        "function ",
1301        "async function ",
1302        "export function ",
1303        "export async function ",
1304        "export default function ",
1305    ],
1306    functions_prefix_paren: &[],
1307    classes: &[
1308        "class ",
1309        "export class ",
1310        "export default class ",
1311        "abstract class ",
1312        "export abstract class ",
1313        "interface ",
1314        "export interface ",
1315        "declare class ",
1316        "declare interface ",
1317    ],
1318    variables: &[
1319        "var ",
1320        "let ",
1321        "const ",
1322        "export var ",
1323        "export let ",
1324        "export const ",
1325    ],
1326    imports: &["import "],
1327    // Jest/Mocha/Jasmine/Vitest: describe/it/test block openers
1328    tests: &[
1329        "describe(",
1330        "it(",
1331        "test(",
1332        "it.each(",
1333        "test.each(",
1334        "describe.each(",
1335    ],
1336    assertions: &["expect("],
1337    test_suites: &[],
1338};
1339
1340const SP_GO: SymbolPatterns = SymbolPatterns {
1341    functions: &["func "],
1342    functions_prefix_paren: &[],
1343    classes: &["type "],
1344    variables: &["var "],
1345    imports: &["import "],
1346    // Go standard testing: Test* functions (convention is practically exclusive to _test.go files)
1347    tests: &["func Test", "func Benchmark", "func Fuzz"],
1348    assertions: &[],
1349    test_suites: &[],
1350};
1351
1352const SP_JAVA: SymbolPatterns = SymbolPatterns {
1353    functions: &[],
1354    functions_prefix_paren: &[],
1355    classes: &[
1356        "class ",
1357        "public class ",
1358        "private class ",
1359        "protected class ",
1360        "abstract class ",
1361        "final class ",
1362        "public abstract class ",
1363        "public final class ",
1364        "interface ",
1365        "public interface ",
1366        "enum ",
1367        "public enum ",
1368        "record ",
1369        "public record ",
1370        "@interface ",
1371    ],
1372    variables: &[],
1373    imports: &["import "],
1374    // JUnit 4 & 5, TestNG — annotations appear on their own line before the method
1375    tests: &[
1376        "@Test",
1377        "@ParameterizedTest",
1378        "@RepeatedTest",
1379        "@TestFactory",
1380        "@TestTemplate",
1381    ],
1382    assertions: &[
1383        "assertEquals(",
1384        "assertNotEquals(",
1385        "assertTrue(",
1386        "assertFalse(",
1387        "assertNull(",
1388        "assertNotNull(",
1389        "assertThat(",
1390        "assertThrows(",
1391        "assertAll(",
1392        "assertArrayEquals(",
1393        "assertIterableEquals(",
1394        "assertLinesMatch(",
1395    ],
1396    test_suites: &[],
1397};
1398
1399const SP_CSHARP: SymbolPatterns = SymbolPatterns {
1400    functions: &[],
1401    functions_prefix_paren: &[],
1402    classes: &[
1403        "class ",
1404        "public class ",
1405        "private class ",
1406        "protected class ",
1407        "internal class ",
1408        "abstract class ",
1409        "sealed class ",
1410        "static class ",
1411        "partial class ",
1412        "public abstract class ",
1413        "public sealed class ",
1414        "public static class ",
1415        "interface ",
1416        "public interface ",
1417        "internal interface ",
1418        "enum ",
1419        "public enum ",
1420        "struct ",
1421        "public struct ",
1422        "record ",
1423        "public record ",
1424    ],
1425    variables: &["var "],
1426    imports: &["using "],
1427    // MSTest, NUnit, xUnit — attributes on their own line before the method
1428    tests: &[
1429        "[TestMethod]",
1430        "[Test]",
1431        "[Fact]",
1432        "[Theory]",
1433        "[TestCase(",
1434        "[DataRow(",
1435        "[InlineData(",
1436        "[MemberData(",
1437    ],
1438    assertions: &[
1439        "Assert.AreEqual(",
1440        "Assert.AreNotEqual(",
1441        "Assert.IsTrue(",
1442        "Assert.IsFalse(",
1443        "Assert.IsNull(",
1444        "Assert.IsNotNull(",
1445        "Assert.Equal(",
1446        "Assert.NotEqual(",
1447        "Assert.True(",
1448        "Assert.False(",
1449        "Assert.That(",
1450        "Assert.Contains(",
1451        "Assert.Throws(",
1452        "Assert.ThrowsAsync(",
1453        "Assert.IsInstanceOfType(",
1454    ],
1455    test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
1456};
1457
1458// GTest, Catch2/doctest, Boost.Test, Unity, Check, CMocka, CppUTest patterns for C and C++.
1459const TEST_PATTERNS_C_CPP: &[&str] = &[
1460    // Google Test
1461    "TEST(",
1462    "TEST_F(",
1463    "TEST_P(",
1464    "TYPED_TEST(",
1465    "TYPED_TEST_P(",
1466    "INSTANTIATE_TEST_SUITE_P(",
1467    "INSTANTIATE_TYPED_TEST_SUITE_P(",
1468    // Catch2 / doctest
1469    "TEST_CASE(",
1470    "SECTION(",
1471    "SCENARIO(",
1472    "SCENARIO_METHOD(",
1473    "TEST_CASE_METHOD(",
1474    // Boost.Test
1475    "BOOST_AUTO_TEST_CASE(",
1476    "BOOST_FIXTURE_TEST_CASE(",
1477    "BOOST_AUTO_TEST_SUITE(",
1478    "BOOST_PARAM_TEST_CASE(",
1479    // CppUnit
1480    "CPPUNIT_TEST(",
1481    "CPPUNIT_TEST_SUITE(",
1482    // Unity (embedded C)
1483    "RUN_TEST(",
1484    "TEST_IGNORE(",
1485    "TEST_FAIL(",
1486    // Check (libcheck — embedded C)
1487    "START_TEST(",
1488    "tcase_add_test(",
1489    "suite_create(",
1490    // CMocka (embedded C)
1491    "cmocka_unit_test(",
1492    "cmocka_run_group_tests(",
1493    // CppUTest
1494    "IGNORE_TEST(",
1495    "TEST_GROUP(",
1496    "TEST_GROUP_BASE(",
1497];
1498
1499// Test assertion patterns shared by C and C++.
1500const ASSERT_PATTERNS_C_CPP: &[&str] = &[
1501    // Google Test ASSERT_* (test-stopping failures)
1502    "ASSERT_EQ(",
1503    "ASSERT_NE(",
1504    "ASSERT_LT(",
1505    "ASSERT_LE(",
1506    "ASSERT_GT(",
1507    "ASSERT_GE(",
1508    "ASSERT_TRUE(",
1509    "ASSERT_FALSE(",
1510    "ASSERT_STREQ(",
1511    "ASSERT_STRNE(",
1512    "ASSERT_FLOAT_EQ(",
1513    "ASSERT_DOUBLE_EQ(",
1514    "ASSERT_NEAR(",
1515    "ASSERT_THROW(",
1516    "ASSERT_NO_THROW(",
1517    "ASSERT_ANY_THROW(",
1518    // Google Test EXPECT_* (non-stopping failures)
1519    "EXPECT_EQ(",
1520    "EXPECT_NE(",
1521    "EXPECT_LT(",
1522    "EXPECT_LE(",
1523    "EXPECT_GT(",
1524    "EXPECT_GE(",
1525    "EXPECT_TRUE(",
1526    "EXPECT_FALSE(",
1527    "EXPECT_STREQ(",
1528    "EXPECT_STRNE(",
1529    "EXPECT_FLOAT_EQ(",
1530    "EXPECT_DOUBLE_EQ(",
1531    "EXPECT_NEAR(",
1532    "EXPECT_THROW(",
1533    "EXPECT_NO_THROW(",
1534    "EXPECT_ANY_THROW(",
1535    // Catch2 / doctest assertions
1536    "REQUIRE(",
1537    "CHECK(",
1538    "REQUIRE_FALSE(",
1539    "CHECK_FALSE(",
1540    "REQUIRE_NOTHROW(",
1541    "CHECK_NOTHROW(",
1542    "REQUIRE_THROWS(",
1543    "CHECK_THROWS(",
1544    "REQUIRE_THAT(",
1545    "CHECK_THAT(",
1546    // Unity assertions (embedded C)
1547    "TEST_ASSERT_EQUAL(",
1548    "TEST_ASSERT_EQUAL_INT(",
1549    "TEST_ASSERT_EQUAL_STRING(",
1550    "TEST_ASSERT_EQUAL_FLOAT(",
1551    "TEST_ASSERT_EQUAL_DOUBLE(",
1552    "TEST_ASSERT_EQUAL_PTR(",
1553    "TEST_ASSERT_TRUE(",
1554    "TEST_ASSERT_FALSE(",
1555    "TEST_ASSERT_NULL(",
1556    "TEST_ASSERT_NOT_NULL(",
1557    "TEST_ASSERT_BITS_HIGH(",
1558    "TEST_ASSERT_BITS_LOW(",
1559    // CMocka assertions (embedded C)
1560    "assert_int_equal(",
1561    "assert_int_not_equal(",
1562    "assert_string_equal(",
1563    "assert_string_not_equal(",
1564    "assert_true(",
1565    "assert_false(",
1566    "assert_null(",
1567    "assert_non_null(",
1568    "assert_ptr_equal(",
1569    "assert_memory_equal(",
1570    "assert_return_code(",
1571];
1572
1573// Test suite/group declaration patterns for C and C++.
1574const SUITE_PATTERNS_C_CPP: &[&str] = &[
1575    "TEST_GROUP(",
1576    "TEST_GROUP_BASE(",
1577    "BOOST_AUTO_TEST_SUITE(",
1578    "CPPUNIT_TEST_SUITE(",
1579    "CPPUNIT_TEST_SUITE_END(",
1580];
1581
1582const SP_C: SymbolPatterns = SymbolPatterns {
1583    // C has no function keyword; detect by common return types that precede `(` with no `=`.
1584    functions: &[],
1585    functions_prefix_paren: &[
1586        "void ",
1587        "int ",
1588        "char ",
1589        "float ",
1590        "double ",
1591        "long ",
1592        "unsigned ",
1593        "size_t ",
1594        "static ",
1595        "inline ",
1596        "const ",
1597        "extern ",
1598    ],
1599    classes: &[
1600        "struct ",
1601        "typedef struct ",
1602        "union ",
1603        "typedef union ",
1604        "typedef enum ",
1605    ],
1606    variables: &[],
1607    imports: &["#include "],
1608    tests: TEST_PATTERNS_C_CPP,
1609    assertions: ASSERT_PATTERNS_C_CPP,
1610    test_suites: SUITE_PATTERNS_C_CPP,
1611};
1612
1613const SP_CPP: SymbolPatterns = SymbolPatterns {
1614    // C++ specific function keyword-prefixes; return-type-led patterns use functions_prefix_paren.
1615    functions: &[
1616        "virtual ",  // virtual method declaration/definition
1617        "explicit ", // explicit constructor modifier
1618        "~",         // destructor (e.g. ~MyClass())
1619        "operator",  // operator overload (operator==, operator+, …)
1620    ],
1621    functions_prefix_paren: &[
1622        "void ",
1623        "bool ",
1624        "int ",
1625        "char ",
1626        "float ",
1627        "double ",
1628        "long ",
1629        "unsigned ",
1630        "size_t ",
1631        "auto ",
1632        "static ",
1633        "inline ",
1634        "constexpr ",
1635        "const ",
1636        "extern ",
1637    ],
1638    // `template<` (no space) is the dominant modern style alongside `template ` (with space).
1639    classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1640    variables: &[],
1641    imports: &["#include "],
1642    tests: TEST_PATTERNS_C_CPP,
1643    assertions: ASSERT_PATTERNS_C_CPP,
1644    test_suites: SUITE_PATTERNS_C_CPP,
1645};
1646
1647const SP_SHELL: SymbolPatterns = SymbolPatterns {
1648    functions: &["function "],
1649    functions_prefix_paren: &[],
1650    classes: &[],
1651    variables: &["declare ", "local ", "export "],
1652    imports: &["source ", ". "],
1653    tests: &[],
1654    assertions: &[],
1655    test_suites: &[],
1656};
1657
1658const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1659    functions: &["function ", "Function "],
1660    functions_prefix_paren: &[],
1661    classes: &["class "],
1662    variables: &[],
1663    imports: &["Import-Module ", "using "],
1664    // Pester test framework
1665    tests: &["Describe ", "It ", "Context "],
1666    assertions: &[],
1667    test_suites: &[],
1668};
1669
1670const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1671    functions: &[
1672        "fun ",
1673        "private fun ",
1674        "public fun ",
1675        "protected fun ",
1676        "internal fun ",
1677        "override fun ",
1678        "suspend fun ",
1679        "abstract fun ",
1680        "open fun ",
1681        "private suspend fun ",
1682        "public suspend fun ",
1683    ],
1684    functions_prefix_paren: &[],
1685    classes: &[
1686        "class ",
1687        "data class ",
1688        "sealed class ",
1689        "abstract class ",
1690        "open class ",
1691        "object ",
1692        "companion object",
1693        "interface ",
1694        "enum class ",
1695        "annotation class ",
1696    ],
1697    variables: &["val ", "var ", "private val ", "private var ", "const val "],
1698    imports: &["import "],
1699    // JUnit 4/5, KotlinTest, Kotest
1700    tests: &[
1701        "@Test",
1702        "@ParameterizedTest",
1703        "@RepeatedTest",
1704        "\"should ",
1705        "\"it ",
1706    ],
1707    assertions: &[
1708        "assertEquals(",
1709        "assertNotEquals(",
1710        "assertTrue(",
1711        "assertFalse(",
1712        "assertNull(",
1713        "assertNotNull(",
1714        "assertThat(",
1715        "assertThrows(",
1716        "shouldBe(",
1717        "shouldNotBe(",
1718        "shouldThrow(",
1719    ],
1720    test_suites: &[],
1721};
1722
1723const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1724    functions: &[
1725        "func ",
1726        "private func ",
1727        "public func ",
1728        "internal func ",
1729        "override func ",
1730        "open func ",
1731        "static func ",
1732        "class func ",
1733        "mutating func ",
1734        "private static func ",
1735        "public static func ",
1736    ],
1737    functions_prefix_paren: &[],
1738    classes: &[
1739        "class ",
1740        "struct ",
1741        "protocol ",
1742        "enum ",
1743        "extension ",
1744        "actor ",
1745        "public class ",
1746        "private class ",
1747        "open class ",
1748        "final class ",
1749        "public struct ",
1750        "private struct ",
1751        "public protocol ",
1752    ],
1753    variables: &[
1754        "var ",
1755        "let ",
1756        "private var ",
1757        "private let ",
1758        "static var ",
1759        "static let ",
1760    ],
1761    imports: &["import "],
1762    // XCTest: test functions are named test* by convention; Swift Testing: @Test attribute
1763    tests: &["func test", "func Test", "@Test"],
1764    assertions: &[
1765        "XCTAssertEqual(",
1766        "XCTAssertNotEqual(",
1767        "XCTAssertTrue(",
1768        "XCTAssertFalse(",
1769        "XCTAssertNil(",
1770        "XCTAssertNotNil(",
1771        "XCTAssertGreaterThan(",
1772        "XCTAssertLessThan(",
1773        "XCTAssertThrowsError(",
1774        "XCTAssertNoThrow(",
1775        "#expect(",
1776    ],
1777    test_suites: &[],
1778};
1779
1780const SP_RUBY: SymbolPatterns = SymbolPatterns {
1781    functions: &["def ", "private def ", "protected def "],
1782    functions_prefix_paren: &[],
1783    classes: &["class ", "module "],
1784    variables: &[],
1785    imports: &["require ", "require_relative "],
1786    // RSpec / minitest
1787    tests: &["it ", "it(", "describe ", "context ", "test "],
1788    assertions: &[],
1789    test_suites: &[],
1790};
1791
1792const SP_SCALA: SymbolPatterns = SymbolPatterns {
1793    functions: &["def ", "private def ", "protected def ", "override def "],
1794    functions_prefix_paren: &[],
1795    classes: &[
1796        "class ",
1797        "case class ",
1798        "abstract class ",
1799        "sealed class ",
1800        "object ",
1801        "trait ",
1802    ],
1803    variables: &["val ", "var ", "lazy val "],
1804    imports: &["import "],
1805    // ScalaTest / MUnit: FunSuite test("..."), FlatSpec it("..."), AnyWordSpec "..." should
1806    tests: &["test(", "it(", "describe("],
1807    assertions: &[],
1808    test_suites: &[],
1809};
1810
1811const SP_PHP: SymbolPatterns = SymbolPatterns {
1812    functions: &[
1813        "function ",
1814        "public function ",
1815        "private function ",
1816        "protected function ",
1817        "static function ",
1818        "abstract function ",
1819        "final function ",
1820        "public static function ",
1821        "private static function ",
1822        "protected static function ",
1823    ],
1824    functions_prefix_paren: &[],
1825    classes: &[
1826        "class ",
1827        "abstract class ",
1828        "final class ",
1829        "interface ",
1830        "trait ",
1831        "enum ",
1832    ],
1833    variables: &[],
1834    imports: &[
1835        "use ",
1836        "require ",
1837        "require_once ",
1838        "include ",
1839        "include_once ",
1840    ],
1841    // PHPUnit: test methods start with test, or use @test annotation
1842    tests: &[
1843        "public function test",
1844        "function test",
1845        "#[Test]",
1846        "#[DataProvider(",
1847    ],
1848    assertions: &[],
1849    test_suites: &[],
1850};
1851
1852const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1853    functions: &[
1854        "def ",
1855        "defp ",
1856        "defmacro ",
1857        "defmacrop ",
1858        "defguard ",
1859        "defguardp ",
1860    ],
1861    functions_prefix_paren: &[],
1862    classes: &["defmodule ", "defprotocol ", "defimpl "],
1863    variables: &[],
1864    imports: &["import ", "alias ", "use ", "require "],
1865    // ExUnit
1866    tests: &["test ", "describe "],
1867    assertions: &[],
1868    test_suites: &[],
1869};
1870
1871const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1872    functions: &[],
1873    functions_prefix_paren: &[],
1874    classes: &["-module("],
1875    variables: &[],
1876    imports: &["-import(", "-include(", "-include_lib("],
1877    tests: &[],
1878    assertions: &[],
1879    test_suites: &[],
1880};
1881
1882const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1883    functions: &[
1884        "let ",
1885        "let rec ",
1886        "member ",
1887        "override ",
1888        "abstract member ",
1889    ],
1890    functions_prefix_paren: &[],
1891    classes: &["type "],
1892    variables: &["let mutable "],
1893    imports: &["open "],
1894    // NUnit / xUnit attributes on their own line; FsUnit uses [<Test>] / [<Fact>]
1895    tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1896    assertions: &[],
1897    test_suites: &[],
1898};
1899
1900const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1901    functions: &["def ", "private def ", "public def ", "protected def "],
1902    functions_prefix_paren: &[],
1903    classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1904    variables: &[],
1905    imports: &["import "],
1906    // Spock framework: feature methods; JUnit annotations
1907    tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1908    assertions: &[],
1909    test_suites: &[],
1910};
1911
1912const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1913    functions: &[],
1914    functions_prefix_paren: &[],
1915    classes: &["class ", "data ", "newtype ", "type "],
1916    variables: &[],
1917    imports: &["import "],
1918    tests: &[],
1919    assertions: &[],
1920    test_suites: &[],
1921};
1922
1923const SP_LUA: SymbolPatterns = SymbolPatterns {
1924    functions: &["function ", "local function "],
1925    functions_prefix_paren: &[],
1926    classes: &[],
1927    variables: &["local "],
1928    imports: &[],
1929    // busted test framework
1930    tests: &["it(", "describe(", "pending("],
1931    assertions: &[],
1932    test_suites: &[],
1933};
1934
1935const SP_NIM: SymbolPatterns = SymbolPatterns {
1936    functions: &[
1937        "proc ",
1938        "func ",
1939        "method ",
1940        "iterator ",
1941        "converter ",
1942        "template ",
1943        "macro ",
1944    ],
1945    functions_prefix_paren: &[],
1946    classes: &["type "],
1947    variables: &["var ", "let ", "const "],
1948    imports: &["import ", "from "],
1949    // unittest module
1950    tests: &["test "],
1951    assertions: &[],
1952    test_suites: &[],
1953};
1954
1955const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1956    functions: &["- (", "+ ("],
1957    functions_prefix_paren: &[],
1958    classes: &["@interface ", "@implementation ", "@protocol "],
1959    variables: &[],
1960    imports: &["#import ", "#include "],
1961    // XCTest: test methods start with - (void)test
1962    tests: &["- (void)test"],
1963    assertions: &[
1964        "XCTAssertEqual(",
1965        "XCTAssertNotEqual(",
1966        "XCTAssertTrue(",
1967        "XCTAssertFalse(",
1968        "XCTAssertNil(",
1969        "XCTAssertNotNil(",
1970        "XCTAssertGreaterThan(",
1971        "XCTAssertLessThan(",
1972        "XCTAssertThrowsError(",
1973        "XCTAssertNoThrow(",
1974    ],
1975    test_suites: &[],
1976};
1977
1978const SP_OCAML: SymbolPatterns = SymbolPatterns {
1979    functions: &["let ", "let rec "],
1980    functions_prefix_paren: &[],
1981    classes: &["type ", "module ", "class "],
1982    variables: &[],
1983    imports: &["open "],
1984    tests: &[],
1985    assertions: &[],
1986    test_suites: &[],
1987};
1988
1989const SP_PERL: SymbolPatterns = SymbolPatterns {
1990    functions: &["sub "],
1991    functions_prefix_paren: &[],
1992    classes: &["package "],
1993    variables: &["my ", "our ", "local "],
1994    imports: &["use ", "require "],
1995    tests: &[],
1996    assertions: &[],
1997    test_suites: &[],
1998};
1999
2000const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
2001    functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
2002    functions_prefix_paren: &[],
2003    classes: &[
2004        "(defrecord ",
2005        "(defprotocol ",
2006        "(deftype ",
2007        "(definterface ",
2008    ],
2009    variables: &["(def ", "(defonce "],
2010    imports: &["(ns ", "(require "],
2011    // clojure.test
2012    tests: &["(deftest ", "(testing "],
2013    assertions: &[],
2014    test_suites: &[],
2015};
2016
2017const SP_JULIA: SymbolPatterns = SymbolPatterns {
2018    functions: &["function ", "macro "],
2019    functions_prefix_paren: &[],
2020    classes: &[
2021        "struct ",
2022        "mutable struct ",
2023        "abstract type ",
2024        "primitive type ",
2025    ],
2026    variables: &["const "],
2027    imports: &["import ", "using "],
2028    // Test.jl standard library
2029    tests: &["@test ", "@testset "],
2030    assertions: &[],
2031    test_suites: &[],
2032};
2033
2034const SP_DART: SymbolPatterns = SymbolPatterns {
2035    functions: &[],
2036    functions_prefix_paren: &[],
2037    classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
2038    variables: &["var ", "final ", "const ", "late "],
2039    imports: &["import "],
2040    // flutter_test / test package
2041    tests: &["test(", "testWidgets(", "group("],
2042    assertions: &[],
2043    test_suites: &[],
2044};
2045
2046const SP_R: SymbolPatterns = SymbolPatterns {
2047    functions: &[],
2048    functions_prefix_paren: &[],
2049    classes: &[],
2050    variables: &[],
2051    imports: &["library(", "source("],
2052    // testthat
2053    tests: &["test_that(", "it(", "describe(", "expect_"],
2054    assertions: &[],
2055    test_suites: &[],
2056};
2057
2058const SP_SQL: SymbolPatterns = SymbolPatterns {
2059    functions: &[
2060        "create function ",
2061        "create or replace function ",
2062        "create procedure ",
2063        "create or replace procedure ",
2064        "CREATE FUNCTION ",
2065        "CREATE OR REPLACE FUNCTION ",
2066        "CREATE PROCEDURE ",
2067        "CREATE OR REPLACE PROCEDURE ",
2068    ],
2069    functions_prefix_paren: &[],
2070    classes: &[
2071        "create table ",
2072        "create view ",
2073        "create schema ",
2074        "CREATE TABLE ",
2075        "CREATE VIEW ",
2076        "CREATE SCHEMA ",
2077    ],
2078    variables: &["declare ", "DECLARE "],
2079    imports: &[],
2080    tests: &[],
2081    assertions: &[],
2082    test_suites: &[],
2083};
2084
2085const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
2086    functions: &["proc ", "PROC "],
2087    functions_prefix_paren: &[],
2088    classes: &[],
2089    variables: &[],
2090    imports: &["include ", "INCLUDE ", "%include "],
2091    tests: &[],
2092    assertions: &[],
2093    test_suites: &[],
2094};
2095
2096const SP_ZIG: SymbolPatterns = SymbolPatterns {
2097    functions: &[
2098        "fn ",
2099        "pub fn ",
2100        "export fn ",
2101        "inline fn ",
2102        "pub inline fn ",
2103    ],
2104    functions_prefix_paren: &[],
2105    classes: &[],
2106    variables: &["var ", "pub var "],
2107    imports: &[],
2108    // Zig built-in test blocks
2109    tests: &["test \"", "test{"],
2110    assertions: &[],
2111    test_suites: &[],
2112};
2113
2114/// Static (non-heap) language scanning parameters.  All fields are `'static` so this struct
2115/// can be stored in a `static` array.  The dynamic `skip_lines` set (used only for Python
2116/// docstring detection) is kept in `ScanConfig` and populated by the caller after lookup.
2117#[allow(clippy::struct_excessive_bools)]
2118#[derive(Clone, Copy)]
2119struct StaticLangConfig {
2120    line_comments: &'static [&'static str],
2121    block_comment: Option<(&'static str, &'static str)>,
2122    allow_single_quote_strings: bool,
2123    allow_double_quote_strings: bool,
2124    allow_triple_quote_strings: bool,
2125    allow_csharp_verbatim_strings: bool,
2126    symbol_patterns: SymbolPatterns,
2127    /// `true` for C, C++, and Objective-C (languages that have a C preprocessor).
2128    has_preprocessor: bool,
2129}
2130
2131#[allow(clippy::struct_excessive_bools)]
2132#[derive(Debug, Clone)]
2133struct ScanConfig {
2134    line_comments: &'static [&'static str],
2135    block_comment: Option<(&'static str, &'static str)>,
2136    allow_single_quote_strings: bool,
2137    allow_double_quote_strings: bool,
2138    allow_triple_quote_strings: bool,
2139    allow_csharp_verbatim_strings: bool,
2140    skip_lines: HashSet<usize>,
2141    symbol_patterns: SymbolPatterns,
2142    /// Branch keywords used to approximate cyclomatic complexity.
2143    branch_keywords: &'static [&'static str],
2144    /// Strategy for computing Logical SLOC.
2145    lsloc_strategy: LslocStrategy,
2146}
2147
2148// ── Per-family base configurations ───────────────────────────────────────────
2149//
2150// Most languages share one of two comment styles.  Define a base `const` for
2151// each family; table entries override only the fields that differ (symbol
2152// patterns, preprocessor flag, verbatim-string flag, etc.).
2153//
2154// C-slash family: `//` line, `/* */` block, single + double quotes.
2155// Covers C, C++, Obj-C, C#, Go, Java, JS/TS/Svelte/Vue, Dart, Groovy, Kotlin,
2156// Scala, SCSS, Swift, Rust, and Zig (Zig has no block comment → overridden).
2157const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
2158    line_comments: &["//"],
2159    block_comment: Some(("/*", "*/")),
2160    allow_single_quote_strings: true,
2161    allow_double_quote_strings: true,
2162    allow_triple_quote_strings: false,
2163    allow_csharp_verbatim_strings: false,
2164    symbol_patterns: SP_NONE,
2165    has_preprocessor: false,
2166};
2167
2168// Hash-comment family: `#` line comment, no block comment, single + double
2169// quotes.  Covers Shell, Ruby, R, Perl, Elixir (each overrides only SP_*);
2170// Python overrides triple-quote; PowerShell and Nim override block_comment.
2171const HASH_BASE: StaticLangConfig = StaticLangConfig {
2172    line_comments: &["#"],
2173    block_comment: None,
2174    allow_single_quote_strings: true,
2175    allow_double_quote_strings: true,
2176    allow_triple_quote_strings: false,
2177    allow_csharp_verbatim_strings: false,
2178    symbol_patterns: SP_NONE,
2179    has_preprocessor: false,
2180};
2181
2182/// Static language-scan configuration table — one entry per supported language.
2183/// Used by `language_scan_config` to avoid a 41-arm match.  All `SP_*` constants
2184/// referenced here are defined above in the same module.
2185static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
2186    // ── C preprocessor family ─────────────────────────────────────────────────
2187    (
2188        Language::C,
2189        StaticLangConfig {
2190            symbol_patterns: SP_C,
2191            has_preprocessor: true,
2192            ..C_SLASH_BASE
2193        },
2194    ),
2195    (
2196        Language::Cpp,
2197        StaticLangConfig {
2198            symbol_patterns: SP_CPP,
2199            has_preprocessor: true,
2200            ..C_SLASH_BASE
2201        },
2202    ),
2203    (
2204        Language::ObjectiveC,
2205        StaticLangConfig {
2206            symbol_patterns: SP_OBJECTIVEC,
2207            has_preprocessor: true,
2208            ..C_SLASH_BASE
2209        },
2210    ),
2211    // ── C-slash family ────────────────────────────────────────────────────────
2212    (
2213        Language::CSharp,
2214        StaticLangConfig {
2215            symbol_patterns: SP_CSHARP,
2216            allow_csharp_verbatim_strings: true,
2217            ..C_SLASH_BASE
2218        },
2219    ),
2220    (
2221        Language::Go,
2222        StaticLangConfig {
2223            symbol_patterns: SP_GO,
2224            ..C_SLASH_BASE
2225        },
2226    ),
2227    (
2228        Language::Java,
2229        StaticLangConfig {
2230            symbol_patterns: SP_JAVA,
2231            ..C_SLASH_BASE
2232        },
2233    ),
2234    (
2235        Language::JavaScript,
2236        StaticLangConfig {
2237            symbol_patterns: SP_JS,
2238            ..C_SLASH_BASE
2239        },
2240    ),
2241    (
2242        Language::TypeScript,
2243        StaticLangConfig {
2244            symbol_patterns: SP_TS,
2245            ..C_SLASH_BASE
2246        },
2247    ),
2248    (
2249        Language::Svelte,
2250        StaticLangConfig {
2251            symbol_patterns: SP_JS,
2252            ..C_SLASH_BASE
2253        },
2254    ),
2255    (
2256        Language::Vue,
2257        StaticLangConfig {
2258            symbol_patterns: SP_JS,
2259            ..C_SLASH_BASE
2260        },
2261    ),
2262    (
2263        Language::Dart,
2264        StaticLangConfig {
2265            symbol_patterns: SP_DART,
2266            ..C_SLASH_BASE
2267        },
2268    ),
2269    (
2270        Language::Groovy,
2271        StaticLangConfig {
2272            symbol_patterns: SP_GROOVY,
2273            ..C_SLASH_BASE
2274        },
2275    ),
2276    (
2277        Language::Kotlin,
2278        StaticLangConfig {
2279            symbol_patterns: SP_KOTLIN,
2280            ..C_SLASH_BASE
2281        },
2282    ),
2283    (
2284        Language::Scala,
2285        StaticLangConfig {
2286            symbol_patterns: SP_SCALA,
2287            ..C_SLASH_BASE
2288        },
2289    ),
2290    (
2291        Language::Scss,
2292        StaticLangConfig {
2293            symbol_patterns: SP_NONE,
2294            ..C_SLASH_BASE
2295        },
2296    ),
2297    // Rust: no single-quote char literals (they're lifetime annotations)
2298    (
2299        Language::Rust,
2300        StaticLangConfig {
2301            symbol_patterns: SP_RUST,
2302            allow_single_quote_strings: false,
2303            ..C_SLASH_BASE
2304        },
2305    ),
2306    // Swift: no single-quote strings
2307    (
2308        Language::Swift,
2309        StaticLangConfig {
2310            symbol_patterns: SP_SWIFT,
2311            allow_single_quote_strings: false,
2312            ..C_SLASH_BASE
2313        },
2314    ),
2315    // Zig: no block comment
2316    (
2317        Language::Zig,
2318        StaticLangConfig {
2319            symbol_patterns: SP_ZIG,
2320            block_comment: None,
2321            ..C_SLASH_BASE
2322        },
2323    ),
2324    // F#: `(*` … `*)` block comment, no single-quote strings
2325    (
2326        Language::FSharp,
2327        StaticLangConfig {
2328            line_comments: &["//"],
2329            block_comment: Some(("(*", "*)")),
2330            allow_single_quote_strings: false,
2331            allow_double_quote_strings: true,
2332            symbol_patterns: SP_FSHARP,
2333            ..C_SLASH_BASE
2334        },
2335    ),
2336    // ── Hash-comment family ───────────────────────────────────────────────────
2337    (
2338        Language::Shell,
2339        StaticLangConfig {
2340            symbol_patterns: SP_SHELL,
2341            ..HASH_BASE
2342        },
2343    ),
2344    (
2345        Language::Elixir,
2346        StaticLangConfig {
2347            symbol_patterns: SP_ELIXIR,
2348            ..HASH_BASE
2349        },
2350    ),
2351    (
2352        Language::Perl,
2353        StaticLangConfig {
2354            symbol_patterns: SP_PERL,
2355            ..HASH_BASE
2356        },
2357    ),
2358    (
2359        Language::R,
2360        StaticLangConfig {
2361            symbol_patterns: SP_R,
2362            ..HASH_BASE
2363        },
2364    ),
2365    (
2366        Language::Ruby,
2367        StaticLangConfig {
2368            symbol_patterns: SP_RUBY,
2369            ..HASH_BASE
2370        },
2371    ),
2372    // Python: triple-quote string literals
2373    (
2374        Language::Python,
2375        StaticLangConfig {
2376            symbol_patterns: SP_PYTHON,
2377            allow_triple_quote_strings: true,
2378            ..HASH_BASE
2379        },
2380    ),
2381    // PowerShell: `<# … #>` block comment
2382    (
2383        Language::PowerShell,
2384        StaticLangConfig {
2385            symbol_patterns: SP_POWERSHELL,
2386            block_comment: Some(("<#", "#>")),
2387            ..HASH_BASE
2388        },
2389    ),
2390    // Nim: `#[` … `]#` block comment
2391    (
2392        Language::Nim,
2393        StaticLangConfig {
2394            symbol_patterns: SP_NIM,
2395            block_comment: Some(("#[", "]#")),
2396            ..HASH_BASE
2397        },
2398    ),
2399    // Makefile / Dockerfile: `#` only, no string literals
2400    (
2401        Language::Makefile,
2402        StaticLangConfig {
2403            symbol_patterns: SP_NONE,
2404            allow_single_quote_strings: false,
2405            allow_double_quote_strings: false,
2406            ..HASH_BASE
2407        },
2408    ),
2409    (
2410        Language::Dockerfile,
2411        StaticLangConfig {
2412            symbol_patterns: SP_NONE,
2413            allow_single_quote_strings: false,
2414            allow_double_quote_strings: false,
2415            ..HASH_BASE
2416        },
2417    ),
2418    // ── Other unique comment styles ───────────────────────────────────────────
2419    // CSS / SCSS: only `/* */` block, no line comment
2420    (
2421        Language::Css,
2422        StaticLangConfig {
2423            line_comments: &[],
2424            block_comment: Some(("/*", "*/")),
2425            symbol_patterns: SP_NONE,
2426            ..C_SLASH_BASE
2427        },
2428    ),
2429    // HTML / XML: `<!-- -->` block, no line comment, no string literals
2430    (
2431        Language::Html,
2432        StaticLangConfig {
2433            line_comments: &[],
2434            block_comment: Some(("<!--", "-->")),
2435            allow_single_quote_strings: false,
2436            allow_double_quote_strings: false,
2437            symbol_patterns: SP_NONE,
2438            ..C_SLASH_BASE
2439        },
2440    ),
2441    (
2442        Language::Xml,
2443        StaticLangConfig {
2444            line_comments: &[],
2445            block_comment: Some(("<!--", "-->")),
2446            allow_single_quote_strings: false,
2447            allow_double_quote_strings: false,
2448            symbol_patterns: SP_NONE,
2449            ..C_SLASH_BASE
2450        },
2451    ),
2452    // Lua: `--` line, `--[[ ]]` block
2453    (
2454        Language::Lua,
2455        StaticLangConfig {
2456            line_comments: &["--"],
2457            block_comment: Some(("--[[", "]]")),
2458            symbol_patterns: SP_LUA,
2459            ..C_SLASH_BASE
2460        },
2461    ),
2462    // Haskell: `--` line, `{- -}` block
2463    (
2464        Language::Haskell,
2465        StaticLangConfig {
2466            line_comments: &["--"],
2467            block_comment: Some(("{-", "-}")),
2468            symbol_patterns: SP_HASKELL,
2469            ..C_SLASH_BASE
2470        },
2471    ),
2472    // SQL: `--` line, `/* */` block, single quote only
2473    (
2474        Language::Sql,
2475        StaticLangConfig {
2476            line_comments: &["--"],
2477            block_comment: Some(("/*", "*/")),
2478            allow_single_quote_strings: true,
2479            allow_double_quote_strings: false,
2480            symbol_patterns: SP_SQL,
2481            ..C_SLASH_BASE
2482        },
2483    ),
2484    // OCaml: `(*` … `*)` only, no line comment, no single-quote strings
2485    (
2486        Language::Ocaml,
2487        StaticLangConfig {
2488            line_comments: &[],
2489            block_comment: Some(("(*", "*)")),
2490            allow_single_quote_strings: false,
2491            symbol_patterns: SP_OCAML,
2492            ..C_SLASH_BASE
2493        },
2494    ),
2495    // Assembly: `;` line comment (NASM/MASM) + `/* */` block (GAS), double-quote
2496    // strings for `.ascii`/`.string` directives. `#` (GAS x86) and `@` (ARM) line
2497    // comments are intentionally NOT added: `#` is an immediate prefix in ARM
2498    // (`mov r0, #5`) and `@` appears in x86 symbol versioning (`memcpy@plt`), so a
2499    // universal superset would mis-count one dialect or the other.
2500    (
2501        Language::Assembly,
2502        StaticLangConfig {
2503            line_comments: &[";"],
2504            block_comment: Some(("/*", "*/")),
2505            allow_single_quote_strings: false,
2506            allow_double_quote_strings: true,
2507            symbol_patterns: SP_ASSEMBLY,
2508            ..C_SLASH_BASE
2509        },
2510    ),
2511    (
2512        Language::Clojure,
2513        StaticLangConfig {
2514            line_comments: &[";"],
2515            block_comment: None,
2516            allow_single_quote_strings: false,
2517            symbol_patterns: SP_CLOJURE,
2518            ..C_SLASH_BASE
2519        },
2520    ),
2521    // Erlang: `%` line comment, no block, no single-quote strings
2522    (
2523        Language::Erlang,
2524        StaticLangConfig {
2525            line_comments: &["%"],
2526            block_comment: None,
2527            allow_single_quote_strings: false,
2528            symbol_patterns: SP_ERLANG,
2529            ..C_SLASH_BASE
2530        },
2531    ),
2532    // PHP: `//` or `#` line, `/* */` block
2533    (
2534        Language::Php,
2535        StaticLangConfig {
2536            line_comments: &["//", "#"],
2537            block_comment: Some(("/*", "*/")),
2538            symbol_patterns: SP_PHP,
2539            ..C_SLASH_BASE
2540        },
2541    ),
2542    // Julia: `#` line, `#= =#` block, double + triple quotes, no single
2543    (
2544        Language::Julia,
2545        StaticLangConfig {
2546            line_comments: &["#"],
2547            block_comment: Some(("#=", "=#")),
2548            allow_single_quote_strings: false,
2549            allow_triple_quote_strings: true,
2550            symbol_patterns: SP_JULIA,
2551            ..C_SLASH_BASE
2552        },
2553    ),
2554    // ── Pass 1 additions ──────────────────────────────────────────────────────
2555    // Solidity: C-slash family (`//`, `/* */`, single + double quotes).
2556    (
2557        Language::Solidity,
2558        StaticLangConfig {
2559            symbol_patterns: SP_SOLIDITY,
2560            ..C_SLASH_BASE
2561        },
2562    ),
2563    // Protocol Buffers: C-slash family, statements terminated by `;`.
2564    (
2565        Language::Protobuf,
2566        StaticLangConfig {
2567            symbol_patterns: SP_PROTOBUF,
2568            ..C_SLASH_BASE
2569        },
2570    ),
2571    // HCL / Terraform: `#` or `//` line, `/* */` block, double-quote strings only.
2572    (
2573        Language::Hcl,
2574        StaticLangConfig {
2575            line_comments: &["#", "//"],
2576            allow_single_quote_strings: false,
2577            symbol_patterns: SP_NONE,
2578            ..C_SLASH_BASE
2579        },
2580    ),
2581    // GraphQL: `#` line comment, no block; `"""` block-string descriptions, no single quotes.
2582    (
2583        Language::GraphQl,
2584        StaticLangConfig {
2585            allow_single_quote_strings: false,
2586            allow_triple_quote_strings: true,
2587            symbol_patterns: SP_NONE,
2588            ..HASH_BASE
2589        },
2590    ),
2591    // ── Pass 2 additions (legacy + embedded / HDL) ────────────────────────────
2592    // Ada: `--` line comment, no block; `'` is a char/attribute tick, not a string.
2593    (
2594        Language::Ada,
2595        StaticLangConfig {
2596            line_comments: &["--"],
2597            block_comment: None,
2598            allow_single_quote_strings: false,
2599            symbol_patterns: SP_ADA,
2600            ..C_SLASH_BASE
2601        },
2602    ),
2603    // VHDL: `--` line comment, no block; `'` is a bit/char literal, not a string.
2604    (
2605        Language::Vhdl,
2606        StaticLangConfig {
2607            line_comments: &["--"],
2608            block_comment: None,
2609            allow_single_quote_strings: false,
2610            symbol_patterns: SP_VHDL,
2611            ..C_SLASH_BASE
2612        },
2613    ),
2614    // Verilog / SystemVerilog: C-slash family; `'` is a sized-literal base, not a string.
2615    (
2616        Language::Verilog,
2617        StaticLangConfig {
2618            allow_single_quote_strings: false,
2619            symbol_patterns: SP_VERILOG,
2620            ..C_SLASH_BASE
2621        },
2622    ),
2623    // Tcl: `#` line comment, no block; `"` strings only.
2624    (
2625        Language::Tcl,
2626        StaticLangConfig {
2627            allow_single_quote_strings: false,
2628            symbol_patterns: SP_TCL,
2629            ..HASH_BASE
2630        },
2631    ),
2632    // Pascal / Delphi: `//` line, `{ }` block; strings are single-quoted.
2633    (
2634        Language::Pascal,
2635        StaticLangConfig {
2636            line_comments: &["//"],
2637            block_comment: Some(("{", "}")),
2638            allow_single_quote_strings: true,
2639            allow_double_quote_strings: false,
2640            symbol_patterns: SP_PASCAL,
2641            ..C_SLASH_BASE
2642        },
2643    ),
2644    // Visual Basic: `'` line comment, no block; `"` strings only.
2645    (
2646        Language::VisualBasic,
2647        StaticLangConfig {
2648            line_comments: &["'"],
2649            block_comment: None,
2650            allow_single_quote_strings: false,
2651            allow_double_quote_strings: true,
2652            symbol_patterns: SP_VB,
2653            ..C_SLASH_BASE
2654        },
2655    ),
2656    // Lisp / Scheme: `;` line comment, `#| |#` block; `"` strings, `'` is the quote operator.
2657    (
2658        Language::Lisp,
2659        StaticLangConfig {
2660            line_comments: &[";"],
2661            block_comment: Some(("#|", "|#")),
2662            allow_single_quote_strings: false,
2663            symbol_patterns: SP_LISP,
2664            ..C_SLASH_BASE
2665        },
2666    ),
2667    // ── Pass 3 additions (scientific / infra / systems / graphics) ────────────
2668    // Fortran: `!` line comment (free-form), no block; single + double strings.
2669    (
2670        Language::Fortran,
2671        StaticLangConfig {
2672            line_comments: &["!"],
2673            block_comment: None,
2674            symbol_patterns: SP_FORTRAN,
2675            ..C_SLASH_BASE
2676        },
2677    ),
2678    // Nix: `#` line, `/* */` block; double-quote strings (and `''` multi-line).
2679    (
2680        Language::Nix,
2681        StaticLangConfig {
2682            block_comment: Some(("/*", "*/")),
2683            allow_single_quote_strings: false,
2684            symbol_patterns: SP_NONE,
2685            ..HASH_BASE
2686        },
2687    ),
2688    // Crystal: `#` line comment, no block; Ruby-like single + double strings.
2689    (
2690        Language::Crystal,
2691        StaticLangConfig {
2692            symbol_patterns: SP_CRYSTAL,
2693            ..HASH_BASE
2694        },
2695    ),
2696    // D: C-slash family (`//`, `/* */`); single-quote char literals + double strings.
2697    (
2698        Language::D,
2699        StaticLangConfig {
2700            symbol_patterns: SP_D,
2701            ..C_SLASH_BASE
2702        },
2703    ),
2704    // GLSL / HLSL / WGSL shaders: C-slash family; no char literals.
2705    (
2706        Language::Glsl,
2707        StaticLangConfig {
2708            allow_single_quote_strings: false,
2709            symbol_patterns: SP_NONE,
2710            ..C_SLASH_BASE
2711        },
2712    ),
2713    // CMake: `#` line, `#[[ ]]` block; double-quote strings only.
2714    (
2715        Language::Cmake,
2716        StaticLangConfig {
2717            block_comment: Some(("#[[", "]]")),
2718            allow_single_quote_strings: false,
2719            symbol_patterns: SP_CMAKE,
2720            ..HASH_BASE
2721        },
2722    ),
2723    // Elm: `--` line, `{- -}` block; double-quote strings only.
2724    (
2725        Language::Elm,
2726        StaticLangConfig {
2727            line_comments: &["--"],
2728            block_comment: Some(("{-", "-}")),
2729            allow_single_quote_strings: false,
2730            symbol_patterns: SP_ELM,
2731            ..C_SLASH_BASE
2732        },
2733    ),
2734    // Awk: `#` line comment, no block; double-quote strings only.
2735    (
2736        Language::Awk,
2737        StaticLangConfig {
2738            allow_single_quote_strings: false,
2739            symbol_patterns: SP_AWK,
2740            ..HASH_BASE
2741        },
2742    ),
2743];
2744
2745/// Per-call IEEE 1045-1992 flags derived from `AnalysisOptions` plus per-language properties.
2746/// Private to this crate; constructed inside `analyze_text`.
2747#[derive(Debug, Clone, Copy)]
2748struct IeeeFlags {
2749    /// True for C, C++, and Objective-C — languages with a C preprocessor.
2750    has_preprocessor_directives: bool,
2751    /// Mirrors `AnalysisOptions::blank_in_block_comment_as_comment`.
2752    blank_in_block_comment_as_comment: bool,
2753    /// Mirrors `AnalysisOptions::collapse_continuation_lines`.
2754    collapse_continuation_lines: bool,
2755}
2756
2757#[derive(Debug, Clone, Copy)]
2758enum StringState {
2759    Single(char),
2760    Triple(&'static str),
2761    VerbatimDouble,
2762}
2763
2764#[allow(clippy::struct_excessive_bools)]
2765#[derive(Debug, Default)]
2766struct LineFacts {
2767    has_code: bool,
2768    has_single_comment: bool,
2769    has_multi_comment: bool,
2770    has_docstring: bool,
2771}
2772
2773/// Process one character while the lexer is inside a string literal.
2774///
2775/// Returns `(new_string_state, advance)` where `advance` is the number of chars to skip.
2776fn process_string_char(
2777    state: StringState,
2778    chars: &[char],
2779    i: usize,
2780) -> (Option<StringState>, usize) {
2781    match state {
2782        StringState::Single(delim) => {
2783            if chars[i] == '\\' {
2784                return (Some(state), 2); // skip escaped character
2785            }
2786            if chars[i] == delim {
2787                (None, 1)
2788            } else {
2789                (Some(state), 1)
2790            }
2791        }
2792        StringState::Triple(delim) => {
2793            if starts_with(chars, i, delim) {
2794                (None, delim.len())
2795            } else {
2796                (Some(state), 1)
2797            }
2798        }
2799        StringState::VerbatimDouble => {
2800            if starts_with(chars, i, "\"\"") {
2801                return (Some(state), 2); // escaped quote-quote inside verbatim string
2802            }
2803            if chars[i] == '"' {
2804                (None, 1)
2805            } else {
2806                (Some(state), 1)
2807            }
2808        }
2809    }
2810}
2811
2812/// Process one character while the lexer is inside a block comment.
2813///
2814/// Returns `(still_in_block_comment, advance)`.
2815fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2816    if starts_with(chars, i, close) {
2817        (false, close.len())
2818    } else {
2819        (true, 1)
2820    }
2821}
2822
2823/// Attempt to begin a new string literal at position `i`.
2824///
2825/// Returns `Some((new_state, advance))` when a string opener is detected, else `None`.
2826fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2827    if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2828        return Some((StringState::VerbatimDouble, 2));
2829    }
2830    if config.allow_triple_quote_strings {
2831        if starts_with(chars, i, "\"\"\"") {
2832            return Some((StringState::Triple("\"\"\""), 3));
2833        }
2834        if starts_with(chars, i, "'''") {
2835            return Some((StringState::Triple("'''"), 3));
2836        }
2837    }
2838    if config.allow_single_quote_strings && chars[i] == '\'' {
2839        return Some((StringState::Single('\''), 1));
2840    }
2841    if config.allow_double_quote_strings && chars[i] == '"' {
2842        return Some((StringState::Single('"'), 1));
2843    }
2844    None
2845}
2846
2847/// Advance past one character position while inside a block comment.
2848///
2849/// Updates `in_block_comment` if the closing delimiter is found and returns the
2850/// number of characters consumed. Returns 0 when no block-comment config is set
2851/// (preserving the caller's `continue`-without-advance behaviour for that impossible state).
2852fn step_through_block_comment(
2853    chars: &[char],
2854    i: usize,
2855    block_comment: Option<(&'static str, &'static str)>,
2856    in_block_comment: &mut bool,
2857) -> usize {
2858    if let Some((_, close)) = block_comment {
2859        let (still_in, advance) = process_block_comment_char(chars, i, close);
2860        *in_block_comment = still_in;
2861        return advance;
2862    }
2863    0
2864}
2865
2866/// If the character at `i` starts a block comment, return the length of the opening
2867/// delimiter so the caller can advance past it. Returns `None` if no match.
2868fn try_open_block_comment(
2869    chars: &[char],
2870    i: usize,
2871    block_comment: Option<(&'static str, &'static str)>,
2872) -> Option<usize> {
2873    let (open, _) = block_comment?;
2874    starts_with(chars, i, open).then_some(open.len())
2875}
2876
2877/// Scan a single physical line and update `facts`, `in_block_comment`, and `string_state`.
2878///
2879/// Returns `true` when the caller should break out of the per-line loop early (line comment hit).
2880fn scan_line(
2881    chars: &[char],
2882    config: &ScanConfig,
2883    facts: &mut LineFacts,
2884    in_block_comment: &mut bool,
2885    string_state: &mut Option<StringState>,
2886) {
2887    let mut i = 0usize;
2888    while i < chars.len() {
2889        // Inside a string literal — advance until the closing delimiter.
2890        if let Some(state) = *string_state {
2891            facts.has_code = true;
2892            let (new_state, advance) = process_string_char(state, chars, i);
2893            *string_state = new_state;
2894            i += advance;
2895            continue;
2896        }
2897
2898        // Inside a block comment — advance until the closing delimiter.
2899        if *in_block_comment {
2900            facts.has_multi_comment = true;
2901            i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2902            continue;
2903        }
2904
2905        // Whitespace outside any string/comment — skip.
2906        if chars[i].is_whitespace() {
2907            i += 1;
2908            continue;
2909        }
2910
2911        // Attempt to open a string literal.
2912        if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2913            facts.has_code = true;
2914            *string_state = Some(new_state);
2915            i += advance;
2916            continue;
2917        }
2918
2919        // Attempt to open a block comment.
2920        if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2921            facts.has_multi_comment = true;
2922            *in_block_comment = true;
2923            i += advance;
2924            continue;
2925        }
2926
2927        // Line comment — rest of the line is a comment; stop scanning.
2928        if config
2929            .line_comments
2930            .iter()
2931            .any(|prefix| starts_with(chars, i, prefix))
2932        {
2933            facts.has_single_comment = true;
2934            break;
2935        }
2936
2937        // Plain code character.
2938        facts.has_code = true;
2939        i += 1;
2940    }
2941}
2942
2943/// Apply IEEE 1045-1992 §4.2 preprocessor-directive tracking and continuation-line merging,
2944/// then emit the finalized `LineFacts` for this physical line.
2945///
2946/// Returns `None` when the line is part of a continuation sequence and should be deferred.
2947fn finalize_line_facts(
2948    facts: LineFacts,
2949    trimmed: &str,
2950    raw: &mut RawLineCounts,
2951    ieee: IeeeFlags,
2952    in_block_comment: bool,
2953    string_state: Option<StringState>,
2954    pending_continuation: &mut Option<LineFacts>,
2955) -> Option<LineFacts> {
2956    // IEEE 1045-1992 §4.2: track preprocessor/compiler directive lines (C/C++/ObjC).
2957    // A directive line is a pure code line (no comment on the same physical line) whose
2958    // trimmed content starts with '#'.
2959    if ieee.has_preprocessor_directives
2960        && facts.has_code
2961        && !facts.has_single_comment
2962        && !facts.has_multi_comment
2963        && trimmed.starts_with('#')
2964    {
2965        raw.compiler_directive_lines += 1;
2966    }
2967
2968    // IEEE 1045-1992 continuation-line handling.
2969    // A line is a continuation starter when it ends with '\' outside any comment or string.
2970    let is_continuation = ieee.collapse_continuation_lines
2971        && !in_block_comment
2972        && string_state.is_none()
2973        && trimmed.ends_with('\\');
2974
2975    if is_continuation {
2976        let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2977        pending.has_code |= facts.has_code;
2978        pending.has_single_comment |= facts.has_single_comment;
2979        pending.has_multi_comment |= facts.has_multi_comment;
2980        pending.has_docstring |= facts.has_docstring;
2981        return None; // defer classification until the sequence ends
2982    }
2983
2984    // Merge any accumulated continuation facts into the final line.
2985    let emit = if let Some(pending) = pending_continuation.take() {
2986        LineFacts {
2987            has_code: pending.has_code | facts.has_code,
2988            has_single_comment: pending.has_single_comment | facts.has_single_comment,
2989            has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2990            has_docstring: pending.has_docstring | facts.has_docstring,
2991        }
2992    } else {
2993        facts
2994    };
2995    Some(emit)
2996}
2997
2998/// Scan and classify one physical line, updating all running state in place.
2999///
3000/// Pre-classified lines (present in `config.skip_lines`) are counted as docstring-comment
3001/// lines and returned early without further analysis.
3002#[allow(clippy::needless_pass_by_value)]
3003#[allow(clippy::too_many_arguments)]
3004#[allow(clippy::many_single_char_names)] // destructuring return from count_symbols; names match field roles
3005fn process_physical_line(
3006    line: &str,
3007    line_idx: usize,
3008    config: &ScanConfig,
3009    raw: &mut RawLineCounts,
3010    in_block_comment: &mut bool,
3011    string_state: &mut Option<StringState>,
3012    pending_continuation: &mut Option<LineFacts>,
3013    ieee: IeeeFlags,
3014) {
3015    raw.total_physical_lines += 1;
3016
3017    if config.skip_lines.contains(&line_idx) {
3018        raw.docstring_comment_lines += 1;
3019        return;
3020    }
3021
3022    let trimmed = line.trim();
3023    let mut facts = LineFacts::default();
3024
3025    // IEEE 1045-1992: blank lines inside block comments are comment lines by default.
3026    // When blank_in_block_comment_as_comment is false, blank lines keep their blank
3027    // classification even while inside a block comment.
3028    if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
3029        facts.has_multi_comment = true;
3030    }
3031
3032    let chars: Vec<char> = line.chars().collect();
3033    scan_line(&chars, config, &mut facts, in_block_comment, string_state);
3034
3035    let Some(emit) = finalize_line_facts(
3036        facts,
3037        trimmed,
3038        raw,
3039        ieee,
3040        *in_block_comment,
3041        *string_state,
3042        pending_continuation,
3043    ) else {
3044        return;
3045    };
3046
3047    classify_line(raw, &emit, trimmed);
3048
3049    if emit.has_code {
3050        use std::hash::{DefaultHasher, Hash, Hasher};
3051        let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
3052        raw.functions += f;
3053        raw.classes += c;
3054        raw.variables += v;
3055        raw.imports += i;
3056        raw.test_count += t;
3057        raw.test_assertion_count += a;
3058        raw.test_suite_count += s;
3059
3060        // Cyclomatic complexity: count branch decision keywords on code lines.
3061        raw.cyclomatic_complexity +=
3062            count_branch_in_line(trimmed.as_bytes(), config.branch_keywords);
3063
3064        // Logical SLOC (language-specific strategy).
3065        match config.lsloc_strategy {
3066            LslocStrategy::Semicolons => {
3067                let semi = u32::try_from(trimmed.bytes().filter(|&b| b == b';').count())
3068                    .unwrap_or(u32::MAX);
3069                *raw.lsloc.get_or_insert(0) += semi;
3070            }
3071            LslocStrategy::NonContinuationNewlines => {
3072                let cont = trimmed.ends_with('\\')
3073                    || trimmed.ends_with(',')
3074                    || trimmed.ends_with('(')
3075                    || trimmed.ends_with('[')
3076                    || trimmed.ends_with('{');
3077                if !cont {
3078                    *raw.lsloc.get_or_insert(0) += 1;
3079                }
3080            }
3081            LslocStrategy::Unsupported => {}
3082        }
3083
3084        // ULOC: hash each trimmed code line for cross-file unique-line counting.
3085        let mut h = DefaultHasher::new();
3086        trimmed.hash(&mut h);
3087        raw.code_line_hashes.push(h.finish());
3088    }
3089}
3090
3091#[allow(clippy::needless_pass_by_value)]
3092fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
3093    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3094    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3095
3096    let mut raw = RawLineCounts::default();
3097    let mut warnings = Vec::new();
3098
3099    let mut in_block_comment = false;
3100    let mut string_state: Option<StringState> = None;
3101    // IEEE continuation-line state: accumulates facts across a backslash-continued sequence.
3102    let mut pending_continuation: Option<LineFacts> = None;
3103
3104    for (line_idx, line) in lines.iter().enumerate() {
3105        process_physical_line(
3106            line,
3107            line_idx,
3108            &config,
3109            &mut raw,
3110            &mut in_block_comment,
3111            &mut string_state,
3112            &mut pending_continuation,
3113            ieee,
3114        );
3115    }
3116
3117    // Flush any pending continuation that reaches end-of-file without a closing line.
3118    if let Some(pending) = pending_continuation.take() {
3119        classify_line(&mut raw, &pending, "");
3120    }
3121
3122    if in_block_comment {
3123        warnings.push("unclosed block comment detected; result is best effort".into());
3124    }
3125    if string_state.is_some() {
3126        warnings.push("unclosed string literal detected; result is best effort".into());
3127    }
3128
3129    RawFileAnalysis {
3130        raw,
3131        parse_mode: if warnings.is_empty() {
3132            ParseMode::Lexical
3133        } else {
3134            ParseMode::LexicalBestEffort
3135        },
3136        warnings,
3137        style_analysis: None,
3138    }
3139}
3140
3141const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
3142    if facts.has_docstring {
3143        raw.docstring_comment_lines += 1;
3144    } else if !facts.has_code
3145        && !facts.has_single_comment
3146        && !facts.has_multi_comment
3147        && trimmed.is_empty()
3148    {
3149        raw.blank_only_lines += 1;
3150    } else if facts.has_code && facts.has_single_comment {
3151        raw.mixed_code_single_comment_lines += 1;
3152    } else if facts.has_code && facts.has_multi_comment {
3153        raw.mixed_code_multi_comment_lines += 1;
3154    } else if facts.has_code {
3155        raw.code_only_lines += 1;
3156    } else if facts.has_single_comment {
3157        raw.single_comment_only_lines += 1;
3158    } else if facts.has_multi_comment {
3159        raw.multi_comment_only_lines += 1;
3160    } else if trimmed.is_empty() {
3161        raw.blank_only_lines += 1;
3162    } else {
3163        raw.skipped_unknown_lines += 1;
3164    }
3165}
3166
3167fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3168    let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
3169    // For return-type-led languages (C/C++): match prefix AND `(` present AND no `=` sits
3170    // between the prefix start and the first `(` (guards against `void* p = malloc(n)`).
3171    let fn_pp = if patterns.functions_prefix_paren.is_empty() {
3172        0
3173    } else if let Some(paren_pos) = trimmed.find('(') {
3174        if trimmed[..paren_pos].contains('=') {
3175            0
3176        } else {
3177            hit(patterns.functions_prefix_paren)
3178        }
3179    } else {
3180        0
3181    };
3182    let test_hit = hit(patterns.tests);
3183    // Lines matching a test pattern count as tests, not as plain functions or classes.
3184    // This prevents double-counting in Python (`def test_` / `class Test`) and Go
3185    // (`func Test` / `func Benchmark` / `func Fuzz`) where the same line satisfies both
3186    // a function/class prefix and a test pattern. Rust is unaffected: `#[test]` is a
3187    // standalone attribute line; the `fn` declaration on the next line does not match any
3188    // test pattern and still increments functions correctly.
3189    let fn_hit = if test_hit == 0 {
3190        hit(patterns.functions) | fn_pp
3191    } else {
3192        0
3193    };
3194    let class_hit = if test_hit == 0 {
3195        hit(patterns.classes)
3196    } else {
3197        0
3198    };
3199    (
3200        fn_hit,
3201        class_hit,
3202        hit(patterns.variables),
3203        hit(patterns.imports),
3204        test_hit,
3205        hit(patterns.assertions),
3206        hit(patterns.test_suites),
3207    )
3208}
3209
3210/// True when `line[start..end]` is surrounded by non-identifier characters.
3211fn is_word_boundary(line: &[u8], start: usize, end: usize) -> bool {
3212    let before_ok =
3213        start == 0 || (!line[start - 1].is_ascii_alphanumeric() && line[start - 1] != b'_');
3214    let after_ok = end >= line.len() || (!line[end].is_ascii_alphanumeric() && line[end] != b'_');
3215    before_ok && after_ok
3216}
3217
3218/// True when `kw_bytes` appears at `line[i..]`, respecting word boundaries when `word_kw` is set.
3219fn keyword_matches_at(line: &[u8], i: usize, kw_bytes: &[u8], word_kw: bool) -> bool {
3220    if &line[i..i + kw_bytes.len()] != kw_bytes {
3221        return false;
3222    }
3223    !word_kw || is_word_boundary(line, i, i + kw_bytes.len())
3224}
3225
3226/// Count branch keyword occurrences in `line` (ASCII bytes of a trimmed code line).
3227///
3228/// Alphabetic keywords are matched word-bounded (not as substrings of longer identifiers).
3229/// Operator tokens (`||`, `&&`, `?`) are matched as raw substrings.
3230fn count_branch_in_line(line: &[u8], keywords: &[&str]) -> u32 {
3231    if keywords.is_empty() || line.is_empty() {
3232        return 0;
3233    }
3234    let mut total = 0u32;
3235    for &kw in keywords {
3236        let kw_bytes = kw.as_bytes();
3237        let word_kw = kw.bytes().all(|b| b.is_ascii_alphabetic() || b == b'_');
3238        let mut i = 0usize;
3239        while i + kw_bytes.len() <= line.len() {
3240            if keyword_matches_at(line, i, kw_bytes, word_kw) {
3241                total += 1;
3242                i += kw_bytes.len();
3243            } else {
3244                i += 1;
3245            }
3246        }
3247    }
3248    total
3249}
3250
3251fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
3252    let needle_chars: Vec<char> = needle.chars().collect();
3253    chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
3254}
3255
3256#[derive(Debug, Clone)]
3257struct PyContext {
3258    indent: usize,
3259    expect_docstring: bool,
3260}
3261
3262/// Update `contexts` to pop any scopes that the current `indent` has outdented past.
3263fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
3264    while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
3265        contexts.pop();
3266    }
3267}
3268
3269/// Handle `pending_block_indent` transition: push a new docstring-expecting context when we
3270/// detect the first indented line of a new block, or cancel the pending state otherwise.
3271fn py_handle_pending_indent(
3272    pending_block_indent: &mut Option<usize>,
3273    contexts: &mut Vec<PyContext>,
3274    indent: usize,
3275    trimmed: &str,
3276) {
3277    let Some(base_indent) = *pending_block_indent else {
3278        return;
3279    };
3280    if indent > base_indent {
3281        contexts.push(PyContext {
3282            indent,
3283            expect_docstring: true,
3284        });
3285        *pending_block_indent = None;
3286    } else if !trimmed.starts_with('@') {
3287        *pending_block_indent = None;
3288    }
3289}
3290
3291/// Check whether the current line is a docstring opener in the current context.
3292///
3293/// If it is, records the line, adjusts `ctx.expect_docstring`, and optionally sets
3294/// `active_docstring` for multi-line docstrings. Returns `true` when the caller should
3295/// `continue` to the next line.
3296fn py_try_record_docstring(
3297    ctx: &mut PyContext,
3298    trimmed: &str,
3299    idx: usize,
3300    docstring_lines: &mut HashSet<usize>,
3301    active_docstring: &mut Option<(&'static str, usize)>,
3302) -> bool {
3303    if !ctx.expect_docstring {
3304        return false;
3305    }
3306    if let Some(delim) = docstring_delimiter(trimmed) {
3307        docstring_lines.insert(idx);
3308        ctx.expect_docstring = false;
3309        if !closes_triple_docstring(trimmed, delim, true) {
3310            *active_docstring = Some((delim, idx));
3311        }
3312        return true;
3313    }
3314    ctx.expect_docstring = false;
3315    false
3316}
3317
3318/// Advance through an active multi-line docstring: marks the current line and clears
3319/// `active_docstring` when the closing delimiter is found. Returns `true` when the caller
3320/// should `continue` to the next line (i.e. we were inside a docstring).
3321fn track_active_docstring(
3322    active_docstring: &mut Option<(&'static str, usize)>,
3323    docstring_lines: &mut HashSet<usize>,
3324    idx: usize,
3325    trimmed: &str,
3326) -> bool {
3327    let Some((delim, start_line)) = *active_docstring else {
3328        return false;
3329    };
3330    docstring_lines.insert(idx);
3331    if closes_triple_docstring(trimmed, delim, idx == start_line) {
3332        *active_docstring = None;
3333    }
3334    true
3335}
3336
3337/// Attempt to record a docstring opener using the top of the context stack.
3338/// Returns `true` when the caller should `continue` to the next line.
3339fn try_record_docstring_if_context(
3340    contexts: &mut [PyContext],
3341    trimmed: &str,
3342    idx: usize,
3343    docstring_lines: &mut HashSet<usize>,
3344    active_docstring: &mut Option<(&'static str, usize)>,
3345) -> bool {
3346    let Some(ctx) = contexts.last_mut() else {
3347        return false;
3348    };
3349    py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
3350}
3351
3352/// If an unclosed docstring is still active at end-of-file, mark all remaining lines.
3353fn mark_unclosed_docstring_lines(
3354    active_docstring: Option<&(&'static str, usize)>,
3355    docstring_lines: &mut HashSet<usize>,
3356    num_lines: usize,
3357) {
3358    if let Some(&(_, start_line)) = active_docstring {
3359        for idx in start_line..num_lines {
3360            docstring_lines.insert(idx);
3361        }
3362    }
3363}
3364
3365fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
3366    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3367    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3368
3369    let mut docstring_lines = HashSet::new();
3370    let mut contexts = vec![PyContext {
3371        indent: 0,
3372        expect_docstring: true,
3373    }];
3374    let mut pending_block_indent: Option<usize> = None;
3375    let mut active_docstring: Option<(&'static str, usize)> = None;
3376
3377    for (idx, line) in lines.iter().enumerate() {
3378        let trimmed = line.trim();
3379        let indent = leading_indent(line);
3380
3381        if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
3382            continue;
3383        }
3384
3385        // Blank lines and comment lines don't affect docstring detection.
3386        if trimmed.is_empty() || trimmed.starts_with('#') {
3387            continue;
3388        }
3389
3390        py_pop_outdented_contexts(&mut contexts, indent);
3391        py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
3392
3393        if try_record_docstring_if_context(
3394            &mut contexts,
3395            trimmed,
3396            idx,
3397            &mut docstring_lines,
3398            &mut active_docstring,
3399        ) {
3400            continue;
3401        }
3402
3403        if is_python_block_header(trimmed) {
3404            pending_block_indent = Some(indent);
3405        }
3406    }
3407
3408    mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
3409
3410    docstring_lines
3411}
3412
3413fn leading_indent(line: &str) -> usize {
3414    line.chars().take_while(|c| c.is_whitespace()).count()
3415}
3416
3417fn is_python_block_header(trimmed: &str) -> bool {
3418    (trimmed.starts_with("def ")
3419        || trimmed.starts_with("async def ")
3420        || trimmed.starts_with("class "))
3421        && trimmed.ends_with(':')
3422}
3423
3424fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
3425    let mut idx = 0usize;
3426    let bytes = trimmed.as_bytes();
3427    while idx < bytes.len() {
3428        let c = bytes[idx] as char;
3429        if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
3430            idx += 1;
3431            continue;
3432        }
3433        break;
3434    }
3435
3436    let rest = &trimmed[idx..];
3437    if rest.starts_with("\"\"\"") {
3438        Some("\"\"\"")
3439    } else if rest.starts_with("'''") {
3440        Some("'''")
3441    } else {
3442        None
3443    }
3444}
3445
3446fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
3447    let mut occurrences = 0usize;
3448    let mut search = trimmed;
3449    while let Some(index) = search.find(delim) {
3450        occurrences += 1;
3451        search = &search[index + delim.len()..];
3452    }
3453
3454    if same_line_as_start {
3455        occurrences >= 2
3456    } else {
3457        occurrences >= 1
3458    }
3459}
3460
3461/// Tree-sitter-backed adapters (compiled only when the `tree-sitter` feature is enabled).
3462///
3463/// When parsing succeeds the result is used directly; on any failure the caller falls back
3464/// to the lexical state machine.
3465#[cfg(feature = "tree-sitter")]
3466pub mod ts {
3467    use tree_sitter::Node;
3468
3469    use super::{ParseMode, RawFileAnalysis, RawLineCounts};
3470
3471    /// Configuration for which AST node kinds map to symbols in this grammar.
3472    struct SymbolKinds {
3473        /// Node kind name for function definitions (e.g. `"function_definition"`).
3474        function_def: &'static str,
3475        /// Node kind name for class definitions (e.g. `"class_definition"`).
3476        class_def: &'static str,
3477        /// Name field of a function node that, when it starts with this prefix, marks a test.
3478        /// Empty string disables test-prefix detection.
3479        test_fn_prefix: &'static str,
3480        /// Name field of a class node that, when it starts with this prefix, marks a test.
3481        /// Empty string disables test-prefix detection.
3482        test_class_prefix: &'static str,
3483        /// When non-empty, `call` nodes whose `function` is an `attribute` access and whose
3484        /// attribute identifier starts with this prefix are counted as test assertions.
3485        /// Used for Python `self.assertXxx(...)` detection.
3486        assertion_attr_prefix: &'static str,
3487    }
3488
3489    impl SymbolKinds {
3490        const fn none() -> Self {
3491            Self {
3492                function_def: "",
3493                class_def: "",
3494                test_fn_prefix: "",
3495                test_class_prefix: "",
3496                assertion_attr_prefix: "",
3497            }
3498        }
3499    }
3500
3501    /// Classify every line of `text` using a tree-sitter grammar.
3502    ///
3503    /// `comment_node_kinds` — node type names that represent comments in this grammar
3504    /// `docstring_stmt_kind` — optional parent node type whose direct `string` child is a docstring
3505    /// `symbols` — AST node kinds used to populate symbol counters
3506    fn analyze_lines(
3507        text: &str,
3508        ts_language: &tree_sitter::Language,
3509        comment_node_kinds: &[&str],
3510        docstring_stmt_kind: Option<&str>,
3511        symbols: &SymbolKinds,
3512    ) -> Option<RawFileAnalysis> {
3513        let mut parser = tree_sitter::Parser::new();
3514        parser.set_language(ts_language).ok()?;
3515        let tree = parser.parse(text, None)?;
3516
3517        let lines: Vec<&str> = text.split_terminator('\n').collect();
3518        let n = lines.len();
3519
3520        let mut has_code = vec![false; n];
3521        let mut has_comment = vec![false; n];
3522        let mut comment_is_block = vec![false; n];
3523        let mut has_docstring = vec![false; n];
3524
3525        // Walk every node in the tree and mark line arrays.
3526        let mut ctx = VisitCtx {
3527            source: text.as_bytes(),
3528            comment_kinds: comment_node_kinds,
3529            docstring_stmt_kind,
3530            has_code: &mut has_code,
3531            has_comment: &mut has_comment,
3532            comment_is_block: &mut comment_is_block,
3533            has_docstring: &mut has_docstring,
3534        };
3535        visit(tree.root_node(), &mut ctx);
3536
3537        let mut raw = RawLineCounts::default();
3538        classify_ts_lines(
3539            &lines,
3540            &has_code,
3541            &has_comment,
3542            &comment_is_block,
3543            &has_docstring,
3544            &mut raw,
3545        );
3546
3547        // Symbol counting: walk the AST a second time to collect function/class/test counts.
3548        if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
3549            count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
3550        }
3551
3552        Some(RawFileAnalysis {
3553            raw,
3554            parse_mode: ParseMode::TreeSitter,
3555            warnings: Vec::new(),
3556            style_analysis: None,
3557        })
3558    }
3559
3560    /// Recurse into every direct child of `node`.
3561    fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3562        for i in 0..node.child_count() {
3563            #[allow(clippy::cast_possible_truncation)]
3564            if let Some(child) = node.child(i as u32) {
3565                count_symbols(child, source, kinds, raw);
3566            }
3567        }
3568    }
3569
3570    /// Handle a function-definition node. Returns `true` if the node matched.
3571    fn try_count_function(
3572        node: Node,
3573        source: &[u8],
3574        kinds: &SymbolKinds,
3575        raw: &mut RawLineCounts,
3576    ) -> bool {
3577        if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
3578            return false;
3579        }
3580        let name = node
3581            .child_by_field_name("name")
3582            .and_then(|n| n.utf8_text(source).ok())
3583            .unwrap_or("");
3584        if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
3585            raw.test_count += 1;
3586        } else {
3587            raw.functions += 1;
3588        }
3589        recurse_children(node, source, kinds, raw);
3590        true
3591    }
3592
3593    /// Handle a class-definition node. Returns `true` if the node matched.
3594    fn try_count_class(
3595        node: Node,
3596        source: &[u8],
3597        kinds: &SymbolKinds,
3598        raw: &mut RawLineCounts,
3599    ) -> bool {
3600        if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
3601            return false;
3602        }
3603        let name = node
3604            .child_by_field_name("name")
3605            .and_then(|n| n.utf8_text(source).ok())
3606            .unwrap_or("");
3607        if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
3608            raw.test_count += 1;
3609        } else {
3610            raw.classes += 1;
3611        }
3612        recurse_children(node, source, kinds, raw);
3613        true
3614    }
3615
3616    /// Handle an assertion call node. Returns `true` if the node matched (skips recursion
3617    /// into arguments, preserving "don't double-count test bodies" semantics).
3618    fn try_count_assertion(
3619        node: Node,
3620        source: &[u8],
3621        kinds: &SymbolKinds,
3622        raw: &mut RawLineCounts,
3623    ) -> bool {
3624        if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
3625            return false;
3626        }
3627        let Some(func) = node.child_by_field_name("function") else {
3628            return false;
3629        };
3630        if func.kind() != "attribute" {
3631            return false;
3632        }
3633        let attr_text = func
3634            .child_by_field_name("attribute")
3635            .and_then(|n| n.utf8_text(source).ok())
3636            .unwrap_or("");
3637        if !attr_text.starts_with(kinds.assertion_attr_prefix) {
3638            return false;
3639        }
3640        raw.test_assertion_count += 1;
3641        true
3642    }
3643
3644    /// Walk the AST and populate `raw.functions`, `raw.classes`, `raw.test_count`,
3645    /// and `raw.test_assertion_count`.
3646    fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3647        if try_count_function(node, source, kinds, raw) {
3648            return;
3649        }
3650        if try_count_class(node, source, kinds, raw) {
3651            return;
3652        }
3653        if try_count_assertion(node, source, kinds, raw) {
3654            return;
3655        }
3656        recurse_children(node, source, kinds, raw);
3657    }
3658
3659    /// Flags describing what kinds of content appear on a single line.
3660    // Four bools are the natural representation for these four independent properties.
3661    #[allow(clippy::struct_excessive_bools)]
3662    #[derive(Clone, Copy)]
3663    struct TsLineFlags {
3664        has_code: bool,
3665        has_comment: bool,
3666        comment_is_block: bool,
3667        has_docstring: bool,
3668    }
3669
3670    /// Classify a single tree-sitter-annotated line and accumulate into `raw`.
3671    const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
3672        if trimmed.is_empty() {
3673            raw.blank_only_lines += 1;
3674        } else if flags.has_docstring && !flags.has_code {
3675            raw.docstring_comment_lines += 1;
3676        } else if flags.has_code && flags.has_comment {
3677            // Classify the mixed line as single or multi based on what kind of comment is on it.
3678            if flags.comment_is_block {
3679                raw.mixed_code_multi_comment_lines += 1;
3680            } else {
3681                raw.mixed_code_single_comment_lines += 1;
3682            }
3683        } else if flags.has_comment {
3684            if flags.comment_is_block {
3685                raw.multi_comment_only_lines += 1;
3686            } else {
3687                raw.single_comment_only_lines += 1;
3688            }
3689        } else {
3690            raw.code_only_lines += 1;
3691        }
3692    }
3693
3694    /// Classify each tree-sitter-annotated line and accumulate counts into `raw`.
3695    fn classify_ts_lines(
3696        lines: &[&str],
3697        has_code: &[bool],
3698        has_comment: &[bool],
3699        comment_is_block: &[bool],
3700        has_docstring: &[bool],
3701        raw: &mut RawLineCounts,
3702    ) {
3703        for i in 0..lines.len() {
3704            raw.total_physical_lines += 1;
3705            classify_ts_line(
3706                lines[i].trim(),
3707                TsLineFlags {
3708                    has_code: has_code[i],
3709                    has_comment: has_comment[i],
3710                    comment_is_block: comment_is_block[i],
3711                    has_docstring: has_docstring[i],
3712                },
3713                raw,
3714            );
3715        }
3716    }
3717
3718    struct VisitCtx<'a> {
3719        source: &'a [u8],
3720        comment_kinds: &'a [&'a str],
3721        docstring_stmt_kind: Option<&'a str>,
3722        has_code: &'a mut Vec<bool>,
3723        has_comment: &'a mut Vec<bool>,
3724        comment_is_block: &'a mut Vec<bool>,
3725        has_docstring: &'a mut Vec<bool>,
3726    }
3727
3728    /// Mark all rows of a comment node and detect whether it is a block comment.
3729    fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
3730        let start_row = node.start_position().row;
3731        let end_row = node.end_position().row;
3732        let first_two = node
3733            .utf8_text(ctx.source)
3734            .unwrap_or("")
3735            .get(..2)
3736            .unwrap_or("");
3737        let is_block = first_two == "/*" || first_two == "<#";
3738        for row in start_row..=end_row {
3739            if row < ctx.has_comment.len() {
3740                ctx.has_comment[row] = true;
3741                if is_block {
3742                    ctx.comment_is_block[row] = true;
3743                }
3744            }
3745        }
3746    }
3747
3748    /// If `node` is an `expression_statement` whose sole named child is a string literal,
3749    /// mark those rows as docstring and return `true`.
3750    fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
3751        let Some(stmt_kind) = ctx.docstring_stmt_kind else {
3752            return false;
3753        };
3754        if kind != stmt_kind || node.named_child_count() != 1 {
3755            return false;
3756        }
3757        let Some(child) = node.named_child(0) else {
3758            return false;
3759        };
3760        if child.kind() != "string" {
3761            return false;
3762        }
3763        let child_start = child.start_position().row;
3764        let child_end = child.end_position().row;
3765        for row in child_start..=child_end {
3766            if row < ctx.has_docstring.len() {
3767                ctx.has_docstring[row] = true;
3768            }
3769        }
3770        true
3771    }
3772
3773    /// Mark all rows of a leaf (non-comment, non-extra) node as code.
3774    fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
3775        let start_row = node.start_position().row;
3776        let end_row = node.end_position().row;
3777        for row in start_row..=end_row {
3778            if row < ctx.has_code.len() {
3779                ctx.has_code[row] = true;
3780            }
3781        }
3782    }
3783
3784    #[allow(clippy::too_many_lines)]
3785    fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
3786        let kind = node.kind();
3787
3788        // Comment node — mark rows as comment, detect block vs. line comment.
3789        if ctx.comment_kinds.contains(&kind) {
3790            visit_comment_node(node, ctx);
3791            return;
3792        }
3793
3794        // Python docstring: expression_statement whose only named child is a string literal.
3795        if visit_maybe_docstring(node, kind, ctx) {
3796            return;
3797        }
3798
3799        // Leaf non-comment node: mark as code.
3800        if node.child_count() == 0 && !node.is_extra() {
3801            visit_leaf_code(node, ctx);
3802            return;
3803        }
3804
3805        for i in 0..node.child_count() {
3806            #[allow(clippy::cast_possible_truncation)]
3807            // child_count bounded by tree-sitter u32 capacity
3808            if let Some(child) = node.child(i as u32) {
3809                visit(child, ctx);
3810            }
3811        }
3812    }
3813
3814    const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3815
3816    const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3817        function_def: "function_definition",
3818        class_def: "class_definition",
3819        test_fn_prefix: "test_",
3820        test_class_prefix: "Test",
3821        assertion_attr_prefix: "assert",
3822    };
3823
3824    /// Parse C or C++ source with tree-sitter-c.
3825    #[must_use]
3826    pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3827        let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3828        analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3829    }
3830
3831    /// Parse Python source with tree-sitter-python.
3832    #[must_use]
3833    pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3834        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3835        analyze_lines(
3836            text,
3837            &lang,
3838            &["comment"],
3839            Some("expression_statement"),
3840            &PYTHON_SYMBOLS,
3841        )
3842    }
3843}
3844
3845#[cfg(test)]
3846mod tests {
3847    use super::*;
3848
3849    #[test]
3850    fn python_docstrings_are_separated() {
3851        let input = r#""""module docs"""
3852
3853
3854def fn_a():
3855    """function docs"""
3856    value = 1  # trailing comment
3857    return value
3858"#;
3859
3860        let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3861        assert_eq!(result.raw.docstring_comment_lines, 2);
3862        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3863        assert_eq!(result.raw.code_only_lines, 2);
3864    }
3865
3866    #[test]
3867    fn c_style_mixed_lines_are_captured() {
3868        let input = "int x = 1; // note\n/* block */\n";
3869        let result = analyze_text(Language::C, input, AnalysisOptions::default());
3870        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3871        assert_eq!(result.raw.multi_comment_only_lines, 1);
3872    }
3873
3874    #[test]
3875    fn detect_language_by_shebang() {
3876        let language = detect_language(
3877            Path::new("script"),
3878            Some("#!/usr/bin/env bash"),
3879            &BTreeMap::new(),
3880            true,
3881        );
3882        assert_eq!(language, Some(Language::Shell));
3883    }
3884
3885    // ── count_symbols: no double-counting of test functions ──────────────────
3886
3887    fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3888        let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3889        let r = &result.raw;
3890        (
3891            r.functions,
3892            r.classes,
3893            r.variables,
3894            r.imports,
3895            r.test_count,
3896            r.test_assertion_count,
3897            r.test_suite_count,
3898        )
3899    }
3900
3901    #[test]
3902    fn python_test_fn_not_double_counted() {
3903        // def test_ lines count as tests only, NOT as functions
3904        let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
3905        assert_eq!(f, 0, "test fn must not also increment functions");
3906        assert_eq!(t, 1, "must be counted as a test");
3907        assert_eq!(c, 0);
3908    }
3909
3910    #[test]
3911    fn python_test_class_not_double_counted() {
3912        // class Test* lines count as tests only, NOT as classes
3913        let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
3914        assert_eq!(c, 0, "test class must not also increment classes");
3915        assert_eq!(t, 1, "must be counted as a test");
3916        assert_eq!(f, 0);
3917    }
3918
3919    #[test]
3920    fn python_regular_fn_counts_as_function() {
3921        let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
3922        assert_eq!(f, 1, "regular function must be counted");
3923        assert_eq!(t, 0);
3924        assert_eq!(c, 0);
3925    }
3926
3927    #[test]
3928    fn python_regular_class_counts_as_class() {
3929        let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
3930        assert_eq!(c, 1, "regular class must be counted");
3931        assert_eq!(t, 0);
3932        assert_eq!(f, 0);
3933    }
3934
3935    #[test]
3936    fn go_test_fn_not_double_counted() {
3937        let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
3938        assert_eq!(f, 0, "Go test func must not also increment functions");
3939        assert_eq!(t, 1, "must be counted as a test");
3940    }
3941
3942    #[test]
3943    fn go_benchmark_fn_not_double_counted() {
3944        let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
3945        assert_eq!(f, 0, "Go benchmark func must not also increment functions");
3946        assert_eq!(t, 1, "must be counted as a test");
3947    }
3948
3949    #[test]
3950    fn go_regular_fn_counts_as_function() {
3951        let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
3952        assert_eq!(f, 1, "regular Go func must be counted");
3953        assert_eq!(t, 0);
3954    }
3955
3956    #[test]
3957    fn rust_test_attr_counts_as_test_not_function() {
3958        // #[test] is a standalone attribute line — counted as a test, never as a function
3959        let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
3960        assert_eq!(t, 1, "#[test] must be counted as a test");
3961        assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
3962    }
3963
3964    #[test]
3965    fn rust_fn_line_counts_as_function_not_test() {
3966        // The fn declaration after #[test] does NOT match any test pattern
3967        let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
3968        assert_eq!(f, 1, "fn declaration must count as a function");
3969        assert_eq!(
3970            t, 0,
3971            "fn declaration line must not be double-counted as a test"
3972        );
3973    }
3974
3975    #[test]
3976    fn js_describe_counts_as_test_not_function() {
3977        let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
3978        assert_eq!(t, 1, "describe must be counted as a test");
3979        assert_eq!(f, 0, "describe must not be counted as a function");
3980    }
3981
3982    #[test]
3983    fn js_regular_fn_counts_as_function() {
3984        let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
3985        assert_eq!(f, 1, "JS function declaration must be counted");
3986        assert_eq!(t, 0);
3987    }
3988
3989    // ── Language detection tests ─────────────────────────────────────────────
3990
3991    use std::collections::BTreeMap;
3992    use std::path::Path;
3993
3994    #[test]
3995    fn detect_language_rs_extension() {
3996        let lang = detect_language(Path::new("foo.rs"), None, &BTreeMap::new(), false);
3997        assert_eq!(lang, Some(Language::Rust));
3998    }
3999
4000    #[test]
4001    fn detect_language_py_extension() {
4002        let lang = detect_language(Path::new("foo.py"), None, &BTreeMap::new(), false);
4003        assert_eq!(lang, Some(Language::Python));
4004    }
4005
4006    #[test]
4007    fn detect_language_ts_extension() {
4008        let lang = detect_language(Path::new("app.ts"), None, &BTreeMap::new(), false);
4009        assert_eq!(lang, Some(Language::TypeScript));
4010    }
4011
4012    #[test]
4013    fn detect_language_js_extension() {
4014        let lang = detect_language(Path::new("app.js"), None, &BTreeMap::new(), false);
4015        assert_eq!(lang, Some(Language::JavaScript));
4016    }
4017
4018    #[test]
4019    fn detect_language_go_extension() {
4020        let lang = detect_language(Path::new("main.go"), None, &BTreeMap::new(), false);
4021        assert_eq!(lang, Some(Language::Go));
4022    }
4023
4024    #[test]
4025    fn detect_language_c_extension() {
4026        let lang = detect_language(Path::new("main.c"), None, &BTreeMap::new(), false);
4027        assert_eq!(lang, Some(Language::C));
4028    }
4029
4030    #[test]
4031    fn detect_language_cpp_extension() {
4032        let lang = detect_language(Path::new("main.cpp"), None, &BTreeMap::new(), false);
4033        assert_eq!(lang, Some(Language::Cpp));
4034    }
4035
4036    #[test]
4037    fn detect_language_java_extension() {
4038        let lang = detect_language(Path::new("Main.java"), None, &BTreeMap::new(), false);
4039        assert_eq!(lang, Some(Language::Java));
4040    }
4041
4042    #[test]
4043    fn detect_language_makefile_exact_name() {
4044        let lang = detect_language(Path::new("Makefile"), None, &BTreeMap::new(), false);
4045        assert_eq!(lang, Some(Language::Makefile));
4046    }
4047
4048    #[test]
4049    fn detect_language_dockerfile_exact_name() {
4050        let lang = detect_language(Path::new("Dockerfile"), None, &BTreeMap::new(), false);
4051        assert_eq!(lang, Some(Language::Dockerfile));
4052    }
4053
4054    #[test]
4055    fn detect_language_rakefile() {
4056        let lang = detect_language(Path::new("Rakefile"), None, &BTreeMap::new(), false);
4057        assert_eq!(lang, Some(Language::Ruby));
4058    }
4059
4060    #[test]
4061    fn detect_language_gemfile() {
4062        let lang = detect_language(Path::new("Gemfile"), None, &BTreeMap::new(), false);
4063        assert_eq!(lang, Some(Language::Ruby));
4064    }
4065
4066    #[test]
4067    fn detect_language_unknown_extension_returns_none() {
4068        let lang = detect_language(Path::new("foo.xyz123"), None, &BTreeMap::new(), false);
4069        assert_eq!(lang, None);
4070    }
4071
4072    #[test]
4073    fn detect_language_extension_override() {
4074        let mut overrides = BTreeMap::new();
4075        overrides.insert("h".into(), "cpp".into());
4076        let lang = detect_language(Path::new("header.h"), None, &overrides, false);
4077        assert_eq!(lang, Some(Language::Cpp));
4078    }
4079
4080    #[test]
4081    fn detect_language_shebang_python() {
4082        let lang = detect_language(
4083            Path::new("script"),
4084            Some("#!/usr/bin/env python3"),
4085            &BTreeMap::new(),
4086            true,
4087        );
4088        assert_eq!(lang, Some(Language::Python));
4089    }
4090
4091    #[test]
4092    fn detect_language_shebang_bash() {
4093        let lang = detect_language(
4094            Path::new("script"),
4095            Some("#!/bin/bash"),
4096            &BTreeMap::new(),
4097            true,
4098        );
4099        assert_eq!(lang, Some(Language::Shell));
4100    }
4101
4102    #[test]
4103    fn detect_language_shebang_ruby() {
4104        let lang = detect_language(
4105            Path::new("script"),
4106            Some("#!/usr/bin/env ruby"),
4107            &BTreeMap::new(),
4108            true,
4109        );
4110        assert_eq!(lang, Some(Language::Ruby));
4111    }
4112
4113    #[test]
4114    fn detect_language_shebang_disabled() {
4115        // When shebang_detection=false, shebang is ignored
4116        let lang = detect_language(
4117            Path::new("script"),
4118            Some("#!/usr/bin/env python3"),
4119            &BTreeMap::new(),
4120            false,
4121        );
4122        assert_eq!(lang, None);
4123    }
4124
4125    #[test]
4126    fn from_name_rust() {
4127        assert_eq!(Language::from_name("rust"), Some(Language::Rust));
4128    }
4129
4130    #[test]
4131    fn from_name_python() {
4132        assert_eq!(Language::from_name("python"), Some(Language::Python));
4133    }
4134
4135    #[test]
4136    fn from_name_unknown() {
4137        assert_eq!(Language::from_name("brainfuck"), None);
4138    }
4139
4140    #[test]
4141    fn from_name_roundtrip_all() {
4142        // Every language's slug should round-trip through from_name
4143        for lang in [
4144            Language::C,
4145            Language::Cpp,
4146            Language::CSharp,
4147            Language::Go,
4148            Language::Java,
4149            Language::JavaScript,
4150            Language::Python,
4151            Language::Rust,
4152            Language::Shell,
4153            Language::PowerShell,
4154            Language::TypeScript,
4155            Language::Assembly,
4156            Language::Clojure,
4157            Language::Css,
4158            Language::Dart,
4159            Language::Dockerfile,
4160            Language::Elixir,
4161            Language::Erlang,
4162            Language::FSharp,
4163            Language::Groovy,
4164            Language::Haskell,
4165            Language::Html,
4166            Language::Julia,
4167            Language::Kotlin,
4168            Language::Lua,
4169            Language::Makefile,
4170            Language::Nim,
4171            Language::ObjectiveC,
4172            Language::Ocaml,
4173            Language::Perl,
4174            Language::Php,
4175            Language::R,
4176            Language::Ruby,
4177            Language::Scala,
4178            Language::Scss,
4179            Language::Sql,
4180            Language::Svelte,
4181            Language::Swift,
4182            Language::Vue,
4183            Language::Xml,
4184            Language::Zig,
4185        ] {
4186            let slug = lang.as_slug();
4187            let roundtripped = Language::from_name(slug);
4188            assert_eq!(
4189                roundtripped,
4190                Some(lang),
4191                "from_name({slug:?}) should return {lang:?}"
4192            );
4193        }
4194    }
4195}