Skip to main content

sloc_languages/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12    C,
13    Cpp,
14    CSharp,
15    Go,
16    Java,
17    JavaScript,
18    Python,
19    Rust,
20    Shell,
21    PowerShell,
22    TypeScript,
23    // --- Extended language support ---
24    Assembly,
25    Clojure,
26    Css,
27    Dart,
28    Dockerfile,
29    Elixir,
30    Erlang,
31    FSharp,
32    Groovy,
33    Haskell,
34    Html,
35    Julia,
36    Kotlin,
37    Lua,
38    Makefile,
39    Nim,
40    ObjectiveC,
41    Ocaml,
42    Perl,
43    Php,
44    R,
45    Ruby,
46    Scala,
47    Scss,
48    Sql,
49    Svelte,
50    Swift,
51    Vue,
52    Xml,
53    Zig,
54}
55
56impl Language {
57    #[must_use]
58    pub const fn display_name(&self) -> &'static str {
59        match self {
60            Self::C => "C",
61            Self::Cpp => "C++",
62            Self::CSharp => "C#",
63            Self::Go => "Go",
64            Self::Java => "Java",
65            Self::JavaScript => "JavaScript",
66            Self::Python => "Python",
67            Self::Rust => "Rust",
68            Self::Shell => "Shell",
69            Self::PowerShell => "PowerShell",
70            Self::TypeScript => "TypeScript",
71            Self::Assembly => "Assembly",
72            Self::Clojure => "Clojure",
73            Self::Css => "CSS",
74            Self::Dart => "Dart",
75            Self::Dockerfile => "Dockerfile",
76            Self::Elixir => "Elixir",
77            Self::Erlang => "Erlang",
78            Self::FSharp => "F#",
79            Self::Groovy => "Groovy",
80            Self::Haskell => "Haskell",
81            Self::Html => "HTML",
82            Self::Julia => "Julia",
83            Self::Kotlin => "Kotlin",
84            Self::Lua => "Lua",
85            Self::Makefile => "Makefile",
86            Self::Nim => "Nim",
87            Self::ObjectiveC => "Objective-C",
88            Self::Ocaml => "OCaml",
89            Self::Perl => "Perl",
90            Self::Php => "PHP",
91            Self::R => "R",
92            Self::Ruby => "Ruby",
93            Self::Scala => "Scala",
94            Self::Scss => "SCSS",
95            Self::Sql => "SQL",
96            Self::Svelte => "Svelte",
97            Self::Swift => "Swift",
98            Self::Vue => "Vue",
99            Self::Xml => "XML",
100            Self::Zig => "Zig",
101        }
102    }
103
104    #[must_use]
105    pub const fn as_slug(&self) -> &'static str {
106        match self {
107            Self::C => "c",
108            Self::Cpp => "cpp",
109            Self::CSharp => "csharp",
110            Self::Go => "go",
111            Self::Java => "java",
112            Self::JavaScript => "javascript",
113            Self::Python => "python",
114            Self::Rust => "rust",
115            Self::Shell => "shell",
116            Self::PowerShell => "powershell",
117            Self::TypeScript => "typescript",
118            Self::Assembly => "assembly",
119            Self::Clojure => "clojure",
120            Self::Css => "css",
121            Self::Dart => "dart",
122            Self::Dockerfile => "dockerfile",
123            Self::Elixir => "elixir",
124            Self::Erlang => "erlang",
125            Self::FSharp => "fsharp",
126            Self::Groovy => "groovy",
127            Self::Haskell => "haskell",
128            Self::Html => "html",
129            Self::Julia => "julia",
130            Self::Kotlin => "kotlin",
131            Self::Lua => "lua",
132            Self::Makefile => "makefile",
133            Self::Nim => "nim",
134            Self::ObjectiveC => "objectivec",
135            Self::Ocaml => "ocaml",
136            Self::Perl => "perl",
137            Self::Php => "php",
138            Self::R => "r",
139            Self::Ruby => "ruby",
140            Self::Scala => "scala",
141            Self::Scss => "scss",
142            Self::Sql => "sql",
143            Self::Svelte => "svelte",
144            Self::Swift => "swift",
145            Self::Vue => "vue",
146            Self::Xml => "xml",
147            Self::Zig => "zig",
148        }
149    }
150
151    #[must_use]
152    pub fn from_name(name: &str) -> Option<Self> {
153        match name.trim().to_ascii_lowercase().as_str() {
154            "c" => Some(Self::C),
155            "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
156            "csharp" | "c#" | "cs" => Some(Self::CSharp),
157            "go" | "golang" => Some(Self::Go),
158            "java" => Some(Self::Java),
159            "javascript" | "js" => Some(Self::JavaScript),
160            "python" | "py" => Some(Self::Python),
161            "rust" | "rs" => Some(Self::Rust),
162            "shell" | "sh" | "bash" => Some(Self::Shell),
163            "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
164            "typescript" | "ts" => Some(Self::TypeScript),
165            "assembly" | "asm" => Some(Self::Assembly),
166            "clojure" | "clj" => Some(Self::Clojure),
167            "css" => Some(Self::Css),
168            "dart" => Some(Self::Dart),
169            "dockerfile" | "docker" => Some(Self::Dockerfile),
170            "elixir" | "ex" => Some(Self::Elixir),
171            "erlang" | "erl" => Some(Self::Erlang),
172            "fsharp" | "f#" | "fs" => Some(Self::FSharp),
173            "groovy" => Some(Self::Groovy),
174            "haskell" | "hs" => Some(Self::Haskell),
175            "html" | "htm" => Some(Self::Html),
176            "julia" | "jl" => Some(Self::Julia),
177            "kotlin" | "kt" => Some(Self::Kotlin),
178            "lua" => Some(Self::Lua),
179            "makefile" | "make" | "mk" => Some(Self::Makefile),
180            "nim" => Some(Self::Nim),
181            "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
182            "ocaml" | "ml" => Some(Self::Ocaml),
183            "perl" | "pl" => Some(Self::Perl),
184            "php" => Some(Self::Php),
185            "r" => Some(Self::R),
186            "ruby" | "rb" => Some(Self::Ruby),
187            "scala" => Some(Self::Scala),
188            "scss" | "sass" => Some(Self::Scss),
189            "sql" => Some(Self::Sql),
190            "svelte" => Some(Self::Svelte),
191            "swift" => Some(Self::Swift),
192            "vue" => Some(Self::Vue),
193            "xml" => Some(Self::Xml),
194            "zig" => Some(Self::Zig),
195            _ => None,
196        }
197    }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default)]
201pub struct RawLineCounts {
202    pub total_physical_lines: u64,
203    pub blank_only_lines: u64,
204    pub code_only_lines: u64,
205    pub single_comment_only_lines: u64,
206    pub multi_comment_only_lines: u64,
207    pub mixed_code_single_comment_lines: u64,
208    pub mixed_code_multi_comment_lines: u64,
209    pub docstring_comment_lines: u64,
210    pub skipped_unknown_lines: u64,
211    /// Best-effort count of function/method definition lines detected lexically.
212    #[serde(default)]
213    pub functions: u64,
214    /// Best-effort count of class/struct/trait/type definition lines detected lexically.
215    #[serde(default)]
216    pub classes: u64,
217    /// Best-effort count of variable declaration lines detected lexically.
218    #[serde(default)]
219    pub variables: u64,
220    /// Best-effort count of import/use/include statement lines detected lexically.
221    #[serde(default)]
222    pub imports: u64,
223    /// Lines consisting solely of preprocessor/compiler directives (e.g. `#include`, `#define`
224    /// in C/C++/Objective-C). Always a subset of `code_only_lines`. Controlled by
225    /// `AnalysisConfig::count_compiler_directives`. IEEE 1045-1992 ยง4.2.
226    #[serde(default)]
227    pub compiler_directive_lines: u64,
228    /// Best-effort count of test case / test function definition lines detected lexically
229    /// (`GTest`, Catch2, `PyTest`, `JUnit`, etc.). Always a subset of `code_only_lines`.
230    #[serde(default)]
231    pub test_count: u64,
232    /// Best-effort count of test assertion call lines detected lexically
233    /// (`ASSERT_EQ`, `EXPECT_TRUE`, assertEquals, Assert.AreEqual, `assert_eq`!, etc.).
234    #[serde(default)]
235    pub test_assertion_count: u64,
236    /// Best-effort count of test suite / fixture / group declaration lines detected lexically
237    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
238    #[serde(default)]
239    pub test_suite_count: u64,
240}
241
242#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum ParseMode {
245    Lexical,
246    LexicalBestEffort,
247    TreeSitter,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct RawFileAnalysis {
252    pub raw: RawLineCounts,
253    pub parse_mode: ParseMode,
254    pub warnings: Vec<String>,
255}
256
257/// IEEE 1045-1992 counting options passed from `sloc-core` (built from `AnalysisConfig`).
258///
259/// `analyze_text` accepts this struct so that the caller can control behaviour that the
260/// standard defines as configurable parameters rather than fixed conventions.
261#[derive(Debug, Clone, Copy)]
262pub struct AnalysisOptions {
263    /// When `true` (IEEE 1045-1992 default), blank lines inside block comments count as
264    /// comment lines rather than blank lines.
265    pub blank_in_block_comment_as_comment: bool,
266    /// When `true`, backslash-continued physical lines are collapsed into a single logical
267    /// line for SLOC counting purposes (IEEE logical SLOC mode).
268    pub collapse_continuation_lines: bool,
269}
270
271impl Default for AnalysisOptions {
272    fn default() -> Self {
273        Self {
274            blank_in_block_comment_as_comment: true,
275            collapse_continuation_lines: false,
276        }
277    }
278}
279
280#[must_use]
281pub fn supported_languages() -> BTreeSet<Language> {
282    [
283        Language::Assembly,
284        Language::C,
285        Language::Clojure,
286        Language::Cpp,
287        Language::CSharp,
288        Language::Css,
289        Language::Dart,
290        Language::Dockerfile,
291        Language::Elixir,
292        Language::Erlang,
293        Language::FSharp,
294        Language::Go,
295        Language::Groovy,
296        Language::Haskell,
297        Language::Html,
298        Language::Java,
299        Language::JavaScript,
300        Language::Julia,
301        Language::Kotlin,
302        Language::Lua,
303        Language::Makefile,
304        Language::Nim,
305        Language::ObjectiveC,
306        Language::Ocaml,
307        Language::Perl,
308        Language::Php,
309        Language::PowerShell,
310        Language::Python,
311        Language::R,
312        Language::Ruby,
313        Language::Rust,
314        Language::Scala,
315        Language::Scss,
316        Language::Shell,
317        Language::Sql,
318        Language::Svelte,
319        Language::Swift,
320        Language::TypeScript,
321        Language::Vue,
322        Language::Xml,
323        Language::Zig,
324    ]
325    .into_iter()
326    .collect()
327}
328
329/// Detect language from a shebang line (e.g. `#!/usr/bin/env python3`).
330fn detect_by_shebang(line: &str) -> Option<Language> {
331    let lower = line.to_ascii_lowercase();
332    if !lower.starts_with("#!") {
333        return None;
334    }
335    if lower.contains("python") {
336        return Some(Language::Python);
337    }
338    if lower.contains("pwsh") || lower.contains("powershell") {
339        return Some(Language::PowerShell);
340    }
341    if lower.contains("bash")
342        || lower.contains("/sh")
343        || lower.contains("zsh")
344        || lower.contains("ksh")
345    {
346        return Some(Language::Shell);
347    }
348    if lower.contains("ruby") {
349        return Some(Language::Ruby);
350    }
351    if lower.contains("perl") {
352        return Some(Language::Perl);
353    }
354    if lower.contains("php") {
355        return Some(Language::Php);
356    }
357    if lower.contains("node") || lower.contains("nodejs") {
358        return Some(Language::JavaScript);
359    }
360    None
361}
362
363/// Detect language purely from a (lowercased) file extension.
364fn detect_by_extension(ext: &str) -> Option<Language> {
365    // Static table avoids a large match statement; each extension maps 1-to-1 to a language.
366    static EXT_MAP: &[(&str, Language)] = &[
367        ("c", Language::C),
368        ("h", Language::C),
369        ("cc", Language::Cpp),
370        ("cp", Language::Cpp),
371        ("cpp", Language::Cpp),
372        ("cxx", Language::Cpp),
373        ("hh", Language::Cpp),
374        ("hpp", Language::Cpp),
375        ("hxx", Language::Cpp),
376        ("cs", Language::CSharp),
377        ("go", Language::Go),
378        ("java", Language::Java),
379        ("js", Language::JavaScript),
380        ("mjs", Language::JavaScript),
381        ("cjs", Language::JavaScript),
382        ("py", Language::Python),
383        ("rs", Language::Rust),
384        ("sh", Language::Shell),
385        ("bash", Language::Shell),
386        ("zsh", Language::Shell),
387        ("ksh", Language::Shell),
388        ("ps1", Language::PowerShell),
389        ("psm1", Language::PowerShell),
390        ("psd1", Language::PowerShell),
391        ("ts", Language::TypeScript),
392        ("mts", Language::TypeScript),
393        ("cts", Language::TypeScript),
394        ("asm", Language::Assembly),
395        ("s", Language::Assembly),
396        ("clj", Language::Clojure),
397        ("cljs", Language::Clojure),
398        ("cljc", Language::Clojure),
399        ("edn", Language::Clojure),
400        ("css", Language::Css),
401        ("dart", Language::Dart),
402        ("ex", Language::Elixir),
403        ("exs", Language::Elixir),
404        ("erl", Language::Erlang),
405        ("hrl", Language::Erlang),
406        ("fs", Language::FSharp),
407        ("fsi", Language::FSharp),
408        ("fsx", Language::FSharp),
409        ("groovy", Language::Groovy),
410        ("gradle", Language::Groovy),
411        ("hs", Language::Haskell),
412        ("lhs", Language::Haskell),
413        ("html", Language::Html),
414        ("htm", Language::Html),
415        ("xhtml", Language::Html),
416        ("jl", Language::Julia),
417        ("kt", Language::Kotlin),
418        ("kts", Language::Kotlin),
419        ("lua", Language::Lua),
420        ("mk", Language::Makefile),
421        ("nim", Language::Nim),
422        ("nims", Language::Nim),
423        ("m", Language::ObjectiveC),
424        ("mm", Language::ObjectiveC),
425        ("ml", Language::Ocaml),
426        ("mli", Language::Ocaml),
427        ("pl", Language::Perl),
428        ("pm", Language::Perl),
429        ("t", Language::Perl),
430        ("php", Language::Php),
431        ("php3", Language::Php),
432        ("php4", Language::Php),
433        ("php5", Language::Php),
434        ("php7", Language::Php),
435        ("phtml", Language::Php),
436        ("r", Language::R),
437        ("rb", Language::Ruby),
438        ("rake", Language::Ruby),
439        ("scala", Language::Scala),
440        ("sc", Language::Scala),
441        ("scss", Language::Scss),
442        ("sass", Language::Scss),
443        ("sql", Language::Sql),
444        ("svelte", Language::Svelte),
445        ("swift", Language::Swift),
446        ("vue", Language::Vue),
447        ("xml", Language::Xml),
448        ("xsd", Language::Xml),
449        ("xsl", Language::Xml),
450        ("xslt", Language::Xml),
451        ("svg", Language::Xml),
452        ("zig", Language::Zig),
453    ];
454    EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
455}
456
457/// Detect language from an exact filename (no extension) or well-known filename patterns.
458fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
459    // Dockerfile: exact name or Dockerfile.* variant
460    if filename == "Dockerfile"
461        || filename.starts_with("Dockerfile.")
462        || filename_lower == "dockerfile"
463    {
464        return Some(Language::Dockerfile);
465    }
466    // Makefile variants
467    if matches!(
468        filename,
469        "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
470    ) {
471        return Some(Language::Makefile);
472    }
473    // Ruby ecosystem files that have no extension
474    if matches!(
475        filename,
476        "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
477    ) {
478        return Some(Language::Ruby);
479    }
480    None
481}
482
483#[must_use]
484#[allow(clippy::too_many_lines)]
485pub fn detect_language(
486    path: &Path,
487    first_line: Option<&str>,
488    extension_overrides: &BTreeMap<String, String>,
489    shebang_detection: bool,
490) -> Option<Language> {
491    let extension = path
492        .extension()
493        .and_then(|ext| ext.to_str())
494        .map(str::to_ascii_lowercase);
495
496    // Extension override check (user-configured mappings win over everything)
497    if let Some(ext) = extension.as_ref() {
498        if let Some(override_name) = extension_overrides.get(ext.as_str()) {
499            if let Some(lang) = Language::from_name(override_name) {
500                return Some(lang);
501            }
502        }
503    }
504
505    // Filename-based detection for files that have no extension or use exact names
506    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
507    let filename_lower = filename.to_ascii_lowercase();
508
509    if let Some(lang) = detect_by_filename(filename, &filename_lower) {
510        return Some(lang);
511    }
512
513    // Extension-based detection
514    if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
515        return Some(lang);
516    }
517
518    // Shebang detection (last resort โ€” only for extensionless scripts)
519    if shebang_detection {
520        if let Some(line) = first_line {
521            if let Some(lang) = detect_by_shebang(line) {
522                return Some(lang);
523            }
524        }
525    }
526
527    None
528}
529
530#[must_use]
531pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
532    // tree-sitter fast-paths (compiled out when feature is disabled)
533    #[cfg(feature = "tree-sitter")]
534    {
535        match language {
536            Language::C | Language::Cpp => {
537                if let Some(result) = ts::analyze_c(text) {
538                    return result;
539                }
540            }
541            Language::Python => {
542                if let Some(result) = ts::analyze_python(text) {
543                    return result;
544                }
545            }
546            _ => {}
547        }
548    }
549
550    let (mut config, has_preprocessor) = language_scan_config(language);
551
552    // Python docstring lines are computed from the text and cannot be a static constant.
553    if language == Language::Python {
554        config.skip_lines = detect_python_docstring_lines(text);
555    }
556
557    // C, C++, and Objective-C have a preprocessor whose directive lines are tracked separately
558    // per IEEE 1045-1992 ยง4.2; every other language uses base flags.
559    let flags = IeeeFlags {
560        has_preprocessor_directives: has_preprocessor,
561        blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
562        collapse_continuation_lines: options.collapse_continuation_lines,
563    };
564    analyze_generic(text, config, flags)
565}
566
567/// Returns the lexical scan configuration for `language` and whether it uses a C preprocessor.
568/// All fields are static constants except `skip_lines`, which is always empty here; callers that
569/// need non-empty skip sets (currently only Python) must populate the field after this call.
570///
571/// The implementation delegates to `LANG_SCAN_TABLE` (a static `&[(Language, StaticLangConfig)]`)
572/// defined below the `SP_*` symbol-pattern constants.  Each language appears exactly once in the
573/// table, so the linear scan is O(|languages|) but avoids a 41-arm `match` statement.
574fn language_scan_config(language: Language) -> (ScanConfig, bool) {
575    let cfg = LANG_SCAN_TABLE
576        .iter()
577        .find_map(|&(l, c)| (l == language).then_some(c))
578        .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
579    (
580        ScanConfig {
581            line_comments: cfg.line_comments,
582            block_comment: cfg.block_comment,
583            allow_single_quote_strings: cfg.allow_single_quote_strings,
584            allow_double_quote_strings: cfg.allow_double_quote_strings,
585            allow_triple_quote_strings: cfg.allow_triple_quote_strings,
586            allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
587            skip_lines: HashSet::new(),
588            symbol_patterns: cfg.symbol_patterns,
589        },
590        cfg.has_preprocessor,
591    )
592}
593
594/// Per-language keyword prefixes used for best-effort structural symbol detection.
595/// Each slice lists line prefixes (after leading whitespace is stripped) that indicate
596/// a definition of that category. Empty slice = detection disabled for that category.
597#[derive(Debug, Clone, Copy)]
598struct SymbolPatterns {
599    functions: &'static [&'static str],
600    /// Line prefixes that classify as a function only when the line ALSO contains `(`
601    /// AND there is no `=` between the prefix and the first `(`.  Used for C/C++ where
602    /// function definitions are led by the return type (`void`, `int`, `bool`, โ€ฆ) with
603    /// no dedicated keyword, so the paren guard distinguishes `void f(x)` from
604    /// `void* p = malloc(n)`.
605    functions_prefix_paren: &'static [&'static str],
606    classes: &'static [&'static str],
607    variables: &'static [&'static str],
608    imports: &'static [&'static str],
609    /// Line prefixes (after stripping leading whitespace) that indicate a test case or test
610    /// function definition. Matched against code lines only, same as other symbol categories.
611    tests: &'static [&'static str],
612    /// Line prefixes that indicate a test assertion call (`ASSERT_EQ`, assertEquals, `assert_eq`!,
613    /// Assert.AreEqual, etc.). Matched against code lines only.
614    assertions: &'static [&'static str],
615    /// Line prefixes that indicate a test suite / fixture / group declaration
616    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
617    test_suites: &'static [&'static str],
618}
619
620impl SymbolPatterns {
621    const fn none() -> Self {
622        Self {
623            functions: &[],
624            functions_prefix_paren: &[],
625            classes: &[],
626            variables: &[],
627            imports: &[],
628            tests: &[],
629            assertions: &[],
630            test_suites: &[],
631        }
632    }
633}
634
635const SP_NONE: SymbolPatterns = SymbolPatterns::none(); // all fields are &[]
636
637const SP_RUST: SymbolPatterns = SymbolPatterns {
638    functions: &[
639        "fn ",
640        "pub fn ",
641        "pub(crate) fn ",
642        "pub(super) fn ",
643        "async fn ",
644        "pub async fn ",
645        "pub(crate) async fn ",
646        "unsafe fn ",
647        "pub unsafe fn ",
648        "pub(crate) unsafe fn ",
649        "const fn ",
650        "pub const fn ",
651        "pub(crate) const fn ",
652        "extern fn ",
653        "pub extern fn ",
654    ],
655    functions_prefix_paren: &[],
656    classes: &[
657        "struct ",
658        "pub struct ",
659        "pub(crate) struct ",
660        "enum ",
661        "pub enum ",
662        "pub(crate) enum ",
663        "trait ",
664        "pub trait ",
665        "pub(crate) trait ",
666        "impl ",
667        "impl<",
668        "type ",
669        "pub type ",
670        "pub(crate) type ",
671    ],
672    variables: &["let ", "let mut "],
673    imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
674    // Built-in #[test], tokio/actix async test attributes, rstest
675    tests: &[
676        "#[test]",
677        "#[tokio::test]",
678        "#[actix_web::test]",
679        "#[rstest]",
680        "#[test_case",
681    ],
682    assertions: &[
683        "assert_eq!(",
684        "assert_ne!(",
685        "assert!(",
686        "assert_matches!(",
687        "assert_err!(",
688        "assert_ok!(",
689    ],
690    test_suites: &[],
691};
692
693const SP_PYTHON: SymbolPatterns = SymbolPatterns {
694    functions: &["def ", "async def "],
695    functions_prefix_paren: &[],
696    classes: &["class "],
697    variables: &[],
698    imports: &["import ", "from "],
699    // pytest: test_ prefix functions and Test* classes; unittest: test_ methods
700    tests: &["def test_", "async def test_", "class Test"],
701    assertions: &[
702        "self.assertEqual(",
703        "self.assertNotEqual(",
704        "self.assertTrue(",
705        "self.assertFalse(",
706        "self.assertIsNone(",
707        "self.assertIsNotNone(",
708        "self.assertIn(",
709        "self.assertNotIn(",
710        "self.assertRaises(",
711        "self.assertAlmostEqual(",
712    ],
713    test_suites: &[],
714};
715
716const SP_JS: SymbolPatterns = SymbolPatterns {
717    functions: &[
718        "function ",
719        "async function ",
720        "export function ",
721        "export async function ",
722        "export default function ",
723    ],
724    functions_prefix_paren: &[],
725    classes: &["class ", "export class ", "export default class "],
726    variables: &[
727        "var ",
728        "let ",
729        "const ",
730        "export var ",
731        "export let ",
732        "export const ",
733    ],
734    imports: &["import "],
735    // Jest/Mocha/Jasmine: describe/it/test block openers
736    tests: &[
737        "describe(",
738        "it(",
739        "test(",
740        "it.each(",
741        "test.each(",
742        "describe.each(",
743    ],
744    assertions: &["expect("],
745    test_suites: &[],
746};
747
748const SP_TS: SymbolPatterns = SymbolPatterns {
749    functions: &[
750        "function ",
751        "async function ",
752        "export function ",
753        "export async function ",
754        "export default function ",
755    ],
756    functions_prefix_paren: &[],
757    classes: &[
758        "class ",
759        "export class ",
760        "export default class ",
761        "abstract class ",
762        "export abstract class ",
763        "interface ",
764        "export interface ",
765        "declare class ",
766        "declare interface ",
767    ],
768    variables: &[
769        "var ",
770        "let ",
771        "const ",
772        "export var ",
773        "export let ",
774        "export const ",
775    ],
776    imports: &["import "],
777    // Jest/Mocha/Jasmine/Vitest: describe/it/test block openers
778    tests: &[
779        "describe(",
780        "it(",
781        "test(",
782        "it.each(",
783        "test.each(",
784        "describe.each(",
785    ],
786    assertions: &["expect("],
787    test_suites: &[],
788};
789
790const SP_GO: SymbolPatterns = SymbolPatterns {
791    functions: &["func "],
792    functions_prefix_paren: &[],
793    classes: &["type "],
794    variables: &["var "],
795    imports: &["import "],
796    // Go standard testing: Test* functions (convention is practically exclusive to _test.go files)
797    tests: &["func Test", "func Benchmark", "func Fuzz"],
798    assertions: &[],
799    test_suites: &[],
800};
801
802const SP_JAVA: SymbolPatterns = SymbolPatterns {
803    functions: &[],
804    functions_prefix_paren: &[],
805    classes: &[
806        "class ",
807        "public class ",
808        "private class ",
809        "protected class ",
810        "abstract class ",
811        "final class ",
812        "public abstract class ",
813        "public final class ",
814        "interface ",
815        "public interface ",
816        "enum ",
817        "public enum ",
818        "record ",
819        "public record ",
820        "@interface ",
821    ],
822    variables: &[],
823    imports: &["import "],
824    // JUnit 4 & 5, TestNG โ€” annotations appear on their own line before the method
825    tests: &[
826        "@Test",
827        "@ParameterizedTest",
828        "@RepeatedTest",
829        "@TestFactory",
830        "@TestTemplate",
831    ],
832    assertions: &[
833        "assertEquals(",
834        "assertNotEquals(",
835        "assertTrue(",
836        "assertFalse(",
837        "assertNull(",
838        "assertNotNull(",
839        "assertThat(",
840        "assertThrows(",
841        "assertAll(",
842        "assertArrayEquals(",
843        "assertIterableEquals(",
844        "assertLinesMatch(",
845    ],
846    test_suites: &[],
847};
848
849const SP_CSHARP: SymbolPatterns = SymbolPatterns {
850    functions: &[],
851    functions_prefix_paren: &[],
852    classes: &[
853        "class ",
854        "public class ",
855        "private class ",
856        "protected class ",
857        "internal class ",
858        "abstract class ",
859        "sealed class ",
860        "static class ",
861        "partial class ",
862        "public abstract class ",
863        "public sealed class ",
864        "public static class ",
865        "interface ",
866        "public interface ",
867        "internal interface ",
868        "enum ",
869        "public enum ",
870        "struct ",
871        "public struct ",
872        "record ",
873        "public record ",
874    ],
875    variables: &["var "],
876    imports: &["using "],
877    // MSTest, NUnit, xUnit โ€” attributes on their own line before the method
878    tests: &[
879        "[TestMethod]",
880        "[Test]",
881        "[Fact]",
882        "[Theory]",
883        "[TestCase(",
884        "[DataRow(",
885        "[InlineData(",
886        "[MemberData(",
887    ],
888    assertions: &[
889        "Assert.AreEqual(",
890        "Assert.AreNotEqual(",
891        "Assert.IsTrue(",
892        "Assert.IsFalse(",
893        "Assert.IsNull(",
894        "Assert.IsNotNull(",
895        "Assert.Equal(",
896        "Assert.NotEqual(",
897        "Assert.True(",
898        "Assert.False(",
899        "Assert.That(",
900        "Assert.Contains(",
901        "Assert.Throws(",
902        "Assert.ThrowsAsync(",
903        "Assert.IsInstanceOfType(",
904    ],
905    test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
906};
907
908// GTest, Catch2/doctest, Boost.Test, Unity, Check, CMocka, CppUTest patterns for C and C++.
909const TEST_PATTERNS_C_CPP: &[&str] = &[
910    // Google Test
911    "TEST(",
912    "TEST_F(",
913    "TEST_P(",
914    "TYPED_TEST(",
915    "TYPED_TEST_P(",
916    "INSTANTIATE_TEST_SUITE_P(",
917    "INSTANTIATE_TYPED_TEST_SUITE_P(",
918    // Catch2 / doctest
919    "TEST_CASE(",
920    "SECTION(",
921    "SCENARIO(",
922    "SCENARIO_METHOD(",
923    "TEST_CASE_METHOD(",
924    // Boost.Test
925    "BOOST_AUTO_TEST_CASE(",
926    "BOOST_FIXTURE_TEST_CASE(",
927    "BOOST_AUTO_TEST_SUITE(",
928    "BOOST_PARAM_TEST_CASE(",
929    // CppUnit
930    "CPPUNIT_TEST(",
931    "CPPUNIT_TEST_SUITE(",
932    // Unity (embedded C)
933    "RUN_TEST(",
934    "TEST_IGNORE(",
935    "TEST_FAIL(",
936    // Check (libcheck โ€” embedded C)
937    "START_TEST(",
938    "tcase_add_test(",
939    "suite_create(",
940    // CMocka (embedded C)
941    "cmocka_unit_test(",
942    "cmocka_run_group_tests(",
943    // CppUTest
944    "IGNORE_TEST(",
945    "TEST_GROUP(",
946    "TEST_GROUP_BASE(",
947];
948
949// Test assertion patterns shared by C and C++.
950const ASSERT_PATTERNS_C_CPP: &[&str] = &[
951    // Google Test ASSERT_* (test-stopping failures)
952    "ASSERT_EQ(",
953    "ASSERT_NE(",
954    "ASSERT_LT(",
955    "ASSERT_LE(",
956    "ASSERT_GT(",
957    "ASSERT_GE(",
958    "ASSERT_TRUE(",
959    "ASSERT_FALSE(",
960    "ASSERT_STREQ(",
961    "ASSERT_STRNE(",
962    "ASSERT_FLOAT_EQ(",
963    "ASSERT_DOUBLE_EQ(",
964    "ASSERT_NEAR(",
965    "ASSERT_THROW(",
966    "ASSERT_NO_THROW(",
967    "ASSERT_ANY_THROW(",
968    // Google Test EXPECT_* (non-stopping failures)
969    "EXPECT_EQ(",
970    "EXPECT_NE(",
971    "EXPECT_LT(",
972    "EXPECT_LE(",
973    "EXPECT_GT(",
974    "EXPECT_GE(",
975    "EXPECT_TRUE(",
976    "EXPECT_FALSE(",
977    "EXPECT_STREQ(",
978    "EXPECT_STRNE(",
979    "EXPECT_FLOAT_EQ(",
980    "EXPECT_DOUBLE_EQ(",
981    "EXPECT_NEAR(",
982    "EXPECT_THROW(",
983    "EXPECT_NO_THROW(",
984    "EXPECT_ANY_THROW(",
985    // Catch2 / doctest assertions
986    "REQUIRE(",
987    "CHECK(",
988    "REQUIRE_FALSE(",
989    "CHECK_FALSE(",
990    "REQUIRE_NOTHROW(",
991    "CHECK_NOTHROW(",
992    "REQUIRE_THROWS(",
993    "CHECK_THROWS(",
994    "REQUIRE_THAT(",
995    "CHECK_THAT(",
996    // Unity assertions (embedded C)
997    "TEST_ASSERT_EQUAL(",
998    "TEST_ASSERT_EQUAL_INT(",
999    "TEST_ASSERT_EQUAL_STRING(",
1000    "TEST_ASSERT_EQUAL_FLOAT(",
1001    "TEST_ASSERT_EQUAL_DOUBLE(",
1002    "TEST_ASSERT_EQUAL_PTR(",
1003    "TEST_ASSERT_TRUE(",
1004    "TEST_ASSERT_FALSE(",
1005    "TEST_ASSERT_NULL(",
1006    "TEST_ASSERT_NOT_NULL(",
1007    "TEST_ASSERT_BITS_HIGH(",
1008    "TEST_ASSERT_BITS_LOW(",
1009    // CMocka assertions (embedded C)
1010    "assert_int_equal(",
1011    "assert_int_not_equal(",
1012    "assert_string_equal(",
1013    "assert_string_not_equal(",
1014    "assert_true(",
1015    "assert_false(",
1016    "assert_null(",
1017    "assert_non_null(",
1018    "assert_ptr_equal(",
1019    "assert_memory_equal(",
1020    "assert_return_code(",
1021];
1022
1023// Test suite/group declaration patterns for C and C++.
1024const SUITE_PATTERNS_C_CPP: &[&str] = &[
1025    "TEST_GROUP(",
1026    "TEST_GROUP_BASE(",
1027    "BOOST_AUTO_TEST_SUITE(",
1028    "CPPUNIT_TEST_SUITE(",
1029    "CPPUNIT_TEST_SUITE_END(",
1030];
1031
1032const SP_C: SymbolPatterns = SymbolPatterns {
1033    // C has no function keyword; detect by common return types that precede `(` with no `=`.
1034    functions: &[],
1035    functions_prefix_paren: &[
1036        "void ",
1037        "int ",
1038        "char ",
1039        "float ",
1040        "double ",
1041        "long ",
1042        "unsigned ",
1043        "size_t ",
1044        "static ",
1045        "inline ",
1046        "const ",
1047        "extern ",
1048    ],
1049    classes: &[
1050        "struct ",
1051        "typedef struct ",
1052        "union ",
1053        "typedef union ",
1054        "typedef enum ",
1055    ],
1056    variables: &[],
1057    imports: &["#include "],
1058    tests: TEST_PATTERNS_C_CPP,
1059    assertions: ASSERT_PATTERNS_C_CPP,
1060    test_suites: SUITE_PATTERNS_C_CPP,
1061};
1062
1063const SP_CPP: SymbolPatterns = SymbolPatterns {
1064    // C++ specific function keyword-prefixes; return-type-led patterns use functions_prefix_paren.
1065    functions: &[
1066        "virtual ",  // virtual method declaration/definition
1067        "explicit ", // explicit constructor modifier
1068        "~",         // destructor (e.g. ~MyClass())
1069        "operator",  // operator overload (operator==, operator+, โ€ฆ)
1070    ],
1071    functions_prefix_paren: &[
1072        "void ",
1073        "bool ",
1074        "int ",
1075        "char ",
1076        "float ",
1077        "double ",
1078        "long ",
1079        "unsigned ",
1080        "size_t ",
1081        "auto ",
1082        "static ",
1083        "inline ",
1084        "constexpr ",
1085        "const ",
1086        "extern ",
1087    ],
1088    // `template<` (no space) is the dominant modern style alongside `template ` (with space).
1089    classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1090    variables: &[],
1091    imports: &["#include "],
1092    tests: TEST_PATTERNS_C_CPP,
1093    assertions: ASSERT_PATTERNS_C_CPP,
1094    test_suites: SUITE_PATTERNS_C_CPP,
1095};
1096
1097const SP_SHELL: SymbolPatterns = SymbolPatterns {
1098    functions: &["function "],
1099    functions_prefix_paren: &[],
1100    classes: &[],
1101    variables: &["declare ", "local ", "export "],
1102    imports: &["source ", ". "],
1103    tests: &[],
1104    assertions: &[],
1105    test_suites: &[],
1106};
1107
1108const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1109    functions: &["function ", "Function "],
1110    functions_prefix_paren: &[],
1111    classes: &["class "],
1112    variables: &[],
1113    imports: &["Import-Module ", "using "],
1114    // Pester test framework
1115    tests: &["Describe ", "It ", "Context "],
1116    assertions: &[],
1117    test_suites: &[],
1118};
1119
1120const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1121    functions: &[
1122        "fun ",
1123        "private fun ",
1124        "public fun ",
1125        "protected fun ",
1126        "internal fun ",
1127        "override fun ",
1128        "suspend fun ",
1129        "abstract fun ",
1130        "open fun ",
1131        "private suspend fun ",
1132        "public suspend fun ",
1133    ],
1134    functions_prefix_paren: &[],
1135    classes: &[
1136        "class ",
1137        "data class ",
1138        "sealed class ",
1139        "abstract class ",
1140        "open class ",
1141        "object ",
1142        "companion object",
1143        "interface ",
1144        "enum class ",
1145        "annotation class ",
1146    ],
1147    variables: &["val ", "var ", "private val ", "private var ", "const val "],
1148    imports: &["import "],
1149    // JUnit 4/5, KotlinTest, Kotest
1150    tests: &[
1151        "@Test",
1152        "@ParameterizedTest",
1153        "@RepeatedTest",
1154        "\"should ",
1155        "\"it ",
1156    ],
1157    assertions: &[
1158        "assertEquals(",
1159        "assertNotEquals(",
1160        "assertTrue(",
1161        "assertFalse(",
1162        "assertNull(",
1163        "assertNotNull(",
1164        "assertThat(",
1165        "assertThrows(",
1166        "shouldBe(",
1167        "shouldNotBe(",
1168        "shouldThrow(",
1169    ],
1170    test_suites: &[],
1171};
1172
1173const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1174    functions: &[
1175        "func ",
1176        "private func ",
1177        "public func ",
1178        "internal func ",
1179        "override func ",
1180        "open func ",
1181        "static func ",
1182        "class func ",
1183        "mutating func ",
1184        "private static func ",
1185        "public static func ",
1186    ],
1187    functions_prefix_paren: &[],
1188    classes: &[
1189        "class ",
1190        "struct ",
1191        "protocol ",
1192        "enum ",
1193        "extension ",
1194        "actor ",
1195        "public class ",
1196        "private class ",
1197        "open class ",
1198        "final class ",
1199        "public struct ",
1200        "private struct ",
1201        "public protocol ",
1202    ],
1203    variables: &[
1204        "var ",
1205        "let ",
1206        "private var ",
1207        "private let ",
1208        "static var ",
1209        "static let ",
1210    ],
1211    imports: &["import "],
1212    // XCTest: test functions are named test* by convention; Swift Testing: @Test attribute
1213    tests: &["func test", "func Test", "@Test"],
1214    assertions: &[
1215        "XCTAssertEqual(",
1216        "XCTAssertNotEqual(",
1217        "XCTAssertTrue(",
1218        "XCTAssertFalse(",
1219        "XCTAssertNil(",
1220        "XCTAssertNotNil(",
1221        "XCTAssertGreaterThan(",
1222        "XCTAssertLessThan(",
1223        "XCTAssertThrowsError(",
1224        "XCTAssertNoThrow(",
1225        "#expect(",
1226    ],
1227    test_suites: &[],
1228};
1229
1230const SP_RUBY: SymbolPatterns = SymbolPatterns {
1231    functions: &["def ", "private def ", "protected def "],
1232    functions_prefix_paren: &[],
1233    classes: &["class ", "module "],
1234    variables: &[],
1235    imports: &["require ", "require_relative "],
1236    // RSpec / minitest
1237    tests: &["it ", "it(", "describe ", "context ", "test "],
1238    assertions: &[],
1239    test_suites: &[],
1240};
1241
1242const SP_SCALA: SymbolPatterns = SymbolPatterns {
1243    functions: &["def ", "private def ", "protected def ", "override def "],
1244    functions_prefix_paren: &[],
1245    classes: &[
1246        "class ",
1247        "case class ",
1248        "abstract class ",
1249        "sealed class ",
1250        "object ",
1251        "trait ",
1252    ],
1253    variables: &["val ", "var ", "lazy val "],
1254    imports: &["import "],
1255    // ScalaTest / MUnit: FunSuite test("..."), FlatSpec it("..."), AnyWordSpec "..." should
1256    tests: &["test(", "it(", "describe("],
1257    assertions: &[],
1258    test_suites: &[],
1259};
1260
1261const SP_PHP: SymbolPatterns = SymbolPatterns {
1262    functions: &[
1263        "function ",
1264        "public function ",
1265        "private function ",
1266        "protected function ",
1267        "static function ",
1268        "abstract function ",
1269        "final function ",
1270        "public static function ",
1271        "private static function ",
1272        "protected static function ",
1273    ],
1274    functions_prefix_paren: &[],
1275    classes: &[
1276        "class ",
1277        "abstract class ",
1278        "final class ",
1279        "interface ",
1280        "trait ",
1281        "enum ",
1282    ],
1283    variables: &[],
1284    imports: &[
1285        "use ",
1286        "require ",
1287        "require_once ",
1288        "include ",
1289        "include_once ",
1290    ],
1291    // PHPUnit: test methods start with test, or use @test annotation
1292    tests: &[
1293        "public function test",
1294        "function test",
1295        "#[Test]",
1296        "#[DataProvider(",
1297    ],
1298    assertions: &[],
1299    test_suites: &[],
1300};
1301
1302const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1303    functions: &[
1304        "def ",
1305        "defp ",
1306        "defmacro ",
1307        "defmacrop ",
1308        "defguard ",
1309        "defguardp ",
1310    ],
1311    functions_prefix_paren: &[],
1312    classes: &["defmodule ", "defprotocol ", "defimpl "],
1313    variables: &[],
1314    imports: &["import ", "alias ", "use ", "require "],
1315    // ExUnit
1316    tests: &["test ", "describe "],
1317    assertions: &[],
1318    test_suites: &[],
1319};
1320
1321const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1322    functions: &[],
1323    functions_prefix_paren: &[],
1324    classes: &["-module("],
1325    variables: &[],
1326    imports: &["-import(", "-include(", "-include_lib("],
1327    tests: &[],
1328    assertions: &[],
1329    test_suites: &[],
1330};
1331
1332const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1333    functions: &[
1334        "let ",
1335        "let rec ",
1336        "member ",
1337        "override ",
1338        "abstract member ",
1339    ],
1340    functions_prefix_paren: &[],
1341    classes: &["type "],
1342    variables: &["let mutable "],
1343    imports: &["open "],
1344    // NUnit / xUnit attributes on their own line; FsUnit uses [<Test>] / [<Fact>]
1345    tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1346    assertions: &[],
1347    test_suites: &[],
1348};
1349
1350const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1351    functions: &["def ", "private def ", "public def ", "protected def "],
1352    functions_prefix_paren: &[],
1353    classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1354    variables: &[],
1355    imports: &["import "],
1356    // Spock framework: feature methods; JUnit annotations
1357    tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1358    assertions: &[],
1359    test_suites: &[],
1360};
1361
1362const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1363    functions: &[],
1364    functions_prefix_paren: &[],
1365    classes: &["class ", "data ", "newtype ", "type "],
1366    variables: &[],
1367    imports: &["import "],
1368    tests: &[],
1369    assertions: &[],
1370    test_suites: &[],
1371};
1372
1373const SP_LUA: SymbolPatterns = SymbolPatterns {
1374    functions: &["function ", "local function "],
1375    functions_prefix_paren: &[],
1376    classes: &[],
1377    variables: &["local "],
1378    imports: &[],
1379    // busted test framework
1380    tests: &["it(", "describe(", "pending("],
1381    assertions: &[],
1382    test_suites: &[],
1383};
1384
1385const SP_NIM: SymbolPatterns = SymbolPatterns {
1386    functions: &[
1387        "proc ",
1388        "func ",
1389        "method ",
1390        "iterator ",
1391        "converter ",
1392        "template ",
1393        "macro ",
1394    ],
1395    functions_prefix_paren: &[],
1396    classes: &["type "],
1397    variables: &["var ", "let ", "const "],
1398    imports: &["import ", "from "],
1399    // unittest module
1400    tests: &["test "],
1401    assertions: &[],
1402    test_suites: &[],
1403};
1404
1405const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1406    functions: &["- (", "+ ("],
1407    functions_prefix_paren: &[],
1408    classes: &["@interface ", "@implementation ", "@protocol "],
1409    variables: &[],
1410    imports: &["#import ", "#include "],
1411    // XCTest: test methods start with - (void)test
1412    tests: &["- (void)test"],
1413    assertions: &[
1414        "XCTAssertEqual(",
1415        "XCTAssertNotEqual(",
1416        "XCTAssertTrue(",
1417        "XCTAssertFalse(",
1418        "XCTAssertNil(",
1419        "XCTAssertNotNil(",
1420        "XCTAssertGreaterThan(",
1421        "XCTAssertLessThan(",
1422        "XCTAssertThrowsError(",
1423        "XCTAssertNoThrow(",
1424    ],
1425    test_suites: &[],
1426};
1427
1428const SP_OCAML: SymbolPatterns = SymbolPatterns {
1429    functions: &["let ", "let rec "],
1430    functions_prefix_paren: &[],
1431    classes: &["type ", "module ", "class "],
1432    variables: &[],
1433    imports: &["open "],
1434    tests: &[],
1435    assertions: &[],
1436    test_suites: &[],
1437};
1438
1439const SP_PERL: SymbolPatterns = SymbolPatterns {
1440    functions: &["sub "],
1441    functions_prefix_paren: &[],
1442    classes: &["package "],
1443    variables: &["my ", "our ", "local "],
1444    imports: &["use ", "require "],
1445    tests: &[],
1446    assertions: &[],
1447    test_suites: &[],
1448};
1449
1450const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1451    functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1452    functions_prefix_paren: &[],
1453    classes: &[
1454        "(defrecord ",
1455        "(defprotocol ",
1456        "(deftype ",
1457        "(definterface ",
1458    ],
1459    variables: &["(def ", "(defonce "],
1460    imports: &["(ns ", "(require "],
1461    // clojure.test
1462    tests: &["(deftest ", "(testing "],
1463    assertions: &[],
1464    test_suites: &[],
1465};
1466
1467const SP_JULIA: SymbolPatterns = SymbolPatterns {
1468    functions: &["function ", "macro "],
1469    functions_prefix_paren: &[],
1470    classes: &[
1471        "struct ",
1472        "mutable struct ",
1473        "abstract type ",
1474        "primitive type ",
1475    ],
1476    variables: &["const "],
1477    imports: &["import ", "using "],
1478    // Test.jl standard library
1479    tests: &["@test ", "@testset "],
1480    assertions: &[],
1481    test_suites: &[],
1482};
1483
1484const SP_DART: SymbolPatterns = SymbolPatterns {
1485    functions: &[],
1486    functions_prefix_paren: &[],
1487    classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1488    variables: &["var ", "final ", "const ", "late "],
1489    imports: &["import "],
1490    // flutter_test / test package
1491    tests: &["test(", "testWidgets(", "group("],
1492    assertions: &[],
1493    test_suites: &[],
1494};
1495
1496const SP_R: SymbolPatterns = SymbolPatterns {
1497    functions: &[],
1498    functions_prefix_paren: &[],
1499    classes: &[],
1500    variables: &[],
1501    imports: &["library(", "source("],
1502    // testthat
1503    tests: &["test_that(", "it(", "describe(", "expect_"],
1504    assertions: &[],
1505    test_suites: &[],
1506};
1507
1508const SP_SQL: SymbolPatterns = SymbolPatterns {
1509    functions: &[
1510        "create function ",
1511        "create or replace function ",
1512        "create procedure ",
1513        "create or replace procedure ",
1514        "CREATE FUNCTION ",
1515        "CREATE OR REPLACE FUNCTION ",
1516        "CREATE PROCEDURE ",
1517        "CREATE OR REPLACE PROCEDURE ",
1518    ],
1519    functions_prefix_paren: &[],
1520    classes: &[
1521        "create table ",
1522        "create view ",
1523        "create schema ",
1524        "CREATE TABLE ",
1525        "CREATE VIEW ",
1526        "CREATE SCHEMA ",
1527    ],
1528    variables: &["declare ", "DECLARE "],
1529    imports: &[],
1530    tests: &[],
1531    assertions: &[],
1532    test_suites: &[],
1533};
1534
1535const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1536    functions: &["proc ", "PROC "],
1537    functions_prefix_paren: &[],
1538    classes: &[],
1539    variables: &[],
1540    imports: &["include ", "INCLUDE ", "%include "],
1541    tests: &[],
1542    assertions: &[],
1543    test_suites: &[],
1544};
1545
1546const SP_ZIG: SymbolPatterns = SymbolPatterns {
1547    functions: &[
1548        "fn ",
1549        "pub fn ",
1550        "export fn ",
1551        "inline fn ",
1552        "pub inline fn ",
1553    ],
1554    functions_prefix_paren: &[],
1555    classes: &[],
1556    variables: &["var ", "pub var "],
1557    imports: &[],
1558    // Zig built-in test blocks
1559    tests: &["test \"", "test{"],
1560    assertions: &[],
1561    test_suites: &[],
1562};
1563
1564/// Static (non-heap) language scanning parameters.  All fields are `'static` so this struct
1565/// can be stored in a `static` array.  The dynamic `skip_lines` set (used only for Python
1566/// docstring detection) is kept in `ScanConfig` and populated by the caller after lookup.
1567#[allow(clippy::struct_excessive_bools)]
1568#[derive(Clone, Copy)]
1569struct StaticLangConfig {
1570    line_comments: &'static [&'static str],
1571    block_comment: Option<(&'static str, &'static str)>,
1572    allow_single_quote_strings: bool,
1573    allow_double_quote_strings: bool,
1574    allow_triple_quote_strings: bool,
1575    allow_csharp_verbatim_strings: bool,
1576    symbol_patterns: SymbolPatterns,
1577    /// `true` for C, C++, and Objective-C (languages that have a C preprocessor).
1578    has_preprocessor: bool,
1579}
1580
1581#[allow(clippy::struct_excessive_bools)]
1582#[derive(Debug, Clone)]
1583struct ScanConfig {
1584    line_comments: &'static [&'static str],
1585    block_comment: Option<(&'static str, &'static str)>,
1586    allow_single_quote_strings: bool,
1587    allow_double_quote_strings: bool,
1588    allow_triple_quote_strings: bool,
1589    allow_csharp_verbatim_strings: bool,
1590    skip_lines: HashSet<usize>,
1591    symbol_patterns: SymbolPatterns,
1592}
1593
1594/// Static language-scan configuration table โ€” one entry per supported language.
1595/// Used by `language_scan_config` to avoid a 41-arm match.  All `SP_*` constants
1596/// referenced here are defined above in the same module.
1597static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1598    (
1599        Language::C,
1600        StaticLangConfig {
1601            line_comments: &["//"],
1602            block_comment: Some(("/*", "*/")),
1603            allow_single_quote_strings: true,
1604            allow_double_quote_strings: true,
1605            allow_triple_quote_strings: false,
1606            allow_csharp_verbatim_strings: false,
1607            symbol_patterns: SP_C,
1608            has_preprocessor: true,
1609        },
1610    ),
1611    (
1612        Language::Cpp,
1613        StaticLangConfig {
1614            line_comments: &["//"],
1615            block_comment: Some(("/*", "*/")),
1616            allow_single_quote_strings: true,
1617            allow_double_quote_strings: true,
1618            allow_triple_quote_strings: false,
1619            allow_csharp_verbatim_strings: false,
1620            symbol_patterns: SP_CPP,
1621            has_preprocessor: true,
1622        },
1623    ),
1624    (
1625        Language::ObjectiveC,
1626        StaticLangConfig {
1627            line_comments: &["//"],
1628            block_comment: Some(("/*", "*/")),
1629            allow_single_quote_strings: true,
1630            allow_double_quote_strings: true,
1631            allow_triple_quote_strings: false,
1632            allow_csharp_verbatim_strings: false,
1633            symbol_patterns: SP_OBJECTIVEC,
1634            has_preprocessor: true,
1635        },
1636    ),
1637    (
1638        Language::CSharp,
1639        StaticLangConfig {
1640            line_comments: &["//"],
1641            block_comment: Some(("/*", "*/")),
1642            allow_single_quote_strings: true,
1643            allow_double_quote_strings: true,
1644            allow_triple_quote_strings: false,
1645            allow_csharp_verbatim_strings: true,
1646            symbol_patterns: SP_CSHARP,
1647            has_preprocessor: false,
1648        },
1649    ),
1650    (
1651        Language::Go,
1652        StaticLangConfig {
1653            line_comments: &["//"],
1654            block_comment: Some(("/*", "*/")),
1655            allow_single_quote_strings: true,
1656            allow_double_quote_strings: true,
1657            allow_triple_quote_strings: false,
1658            allow_csharp_verbatim_strings: false,
1659            symbol_patterns: SP_GO,
1660            has_preprocessor: false,
1661        },
1662    ),
1663    (
1664        Language::Java,
1665        StaticLangConfig {
1666            line_comments: &["//"],
1667            block_comment: Some(("/*", "*/")),
1668            allow_single_quote_strings: true,
1669            allow_double_quote_strings: true,
1670            allow_triple_quote_strings: false,
1671            allow_csharp_verbatim_strings: false,
1672            symbol_patterns: SP_JAVA,
1673            has_preprocessor: false,
1674        },
1675    ),
1676    (
1677        Language::JavaScript,
1678        StaticLangConfig {
1679            line_comments: &["//"],
1680            block_comment: Some(("/*", "*/")),
1681            allow_single_quote_strings: true,
1682            allow_double_quote_strings: true,
1683            allow_triple_quote_strings: false,
1684            allow_csharp_verbatim_strings: false,
1685            symbol_patterns: SP_JS,
1686            has_preprocessor: false,
1687        },
1688    ),
1689    (
1690        Language::Svelte,
1691        StaticLangConfig {
1692            line_comments: &["//"],
1693            block_comment: Some(("/*", "*/")),
1694            allow_single_quote_strings: true,
1695            allow_double_quote_strings: true,
1696            allow_triple_quote_strings: false,
1697            allow_csharp_verbatim_strings: false,
1698            symbol_patterns: SP_JS,
1699            has_preprocessor: false,
1700        },
1701    ),
1702    (
1703        Language::Vue,
1704        StaticLangConfig {
1705            line_comments: &["//"],
1706            block_comment: Some(("/*", "*/")),
1707            allow_single_quote_strings: true,
1708            allow_double_quote_strings: true,
1709            allow_triple_quote_strings: false,
1710            allow_csharp_verbatim_strings: false,
1711            symbol_patterns: SP_JS,
1712            has_preprocessor: false,
1713        },
1714    ),
1715    (
1716        Language::Rust,
1717        StaticLangConfig {
1718            line_comments: &["//"],
1719            block_comment: Some(("/*", "*/")),
1720            allow_single_quote_strings: false,
1721            allow_double_quote_strings: true,
1722            allow_triple_quote_strings: false,
1723            allow_csharp_verbatim_strings: false,
1724            symbol_patterns: SP_RUST,
1725            has_preprocessor: false,
1726        },
1727    ),
1728    (
1729        Language::Shell,
1730        StaticLangConfig {
1731            line_comments: &["#"],
1732            block_comment: None,
1733            allow_single_quote_strings: true,
1734            allow_double_quote_strings: true,
1735            allow_triple_quote_strings: false,
1736            allow_csharp_verbatim_strings: false,
1737            symbol_patterns: SP_SHELL,
1738            has_preprocessor: false,
1739        },
1740    ),
1741    (
1742        Language::PowerShell,
1743        StaticLangConfig {
1744            line_comments: &["#"],
1745            block_comment: Some(("<#", "#>")),
1746            allow_single_quote_strings: true,
1747            allow_double_quote_strings: true,
1748            allow_triple_quote_strings: false,
1749            allow_csharp_verbatim_strings: false,
1750            symbol_patterns: SP_POWERSHELL,
1751            has_preprocessor: false,
1752        },
1753    ),
1754    (
1755        Language::TypeScript,
1756        StaticLangConfig {
1757            line_comments: &["//"],
1758            block_comment: Some(("/*", "*/")),
1759            allow_single_quote_strings: true,
1760            allow_double_quote_strings: true,
1761            allow_triple_quote_strings: false,
1762            allow_csharp_verbatim_strings: false,
1763            symbol_patterns: SP_TS,
1764            has_preprocessor: false,
1765        },
1766    ),
1767    (
1768        Language::Python,
1769        StaticLangConfig {
1770            line_comments: &["#"],
1771            block_comment: None,
1772            allow_single_quote_strings: true,
1773            allow_double_quote_strings: true,
1774            allow_triple_quote_strings: true,
1775            allow_csharp_verbatim_strings: false,
1776            symbol_patterns: SP_PYTHON,
1777            has_preprocessor: false,
1778        },
1779    ),
1780    (
1781        Language::Assembly,
1782        StaticLangConfig {
1783            line_comments: &[";"],
1784            block_comment: None,
1785            allow_single_quote_strings: false,
1786            allow_double_quote_strings: false,
1787            allow_triple_quote_strings: false,
1788            allow_csharp_verbatim_strings: false,
1789            symbol_patterns: SP_ASSEMBLY,
1790            has_preprocessor: false,
1791        },
1792    ),
1793    (
1794        Language::Clojure,
1795        StaticLangConfig {
1796            line_comments: &[";"],
1797            block_comment: None,
1798            allow_single_quote_strings: false,
1799            allow_double_quote_strings: true,
1800            allow_triple_quote_strings: false,
1801            allow_csharp_verbatim_strings: false,
1802            symbol_patterns: SP_CLOJURE,
1803            has_preprocessor: false,
1804        },
1805    ),
1806    (
1807        Language::Css,
1808        StaticLangConfig {
1809            line_comments: &[],
1810            block_comment: Some(("/*", "*/")),
1811            allow_single_quote_strings: true,
1812            allow_double_quote_strings: true,
1813            allow_triple_quote_strings: false,
1814            allow_csharp_verbatim_strings: false,
1815            symbol_patterns: SP_NONE,
1816            has_preprocessor: false,
1817        },
1818    ),
1819    (
1820        Language::Dart,
1821        StaticLangConfig {
1822            line_comments: &["//"],
1823            block_comment: Some(("/*", "*/")),
1824            allow_single_quote_strings: true,
1825            allow_double_quote_strings: true,
1826            allow_triple_quote_strings: false,
1827            allow_csharp_verbatim_strings: false,
1828            symbol_patterns: SP_DART,
1829            has_preprocessor: false,
1830        },
1831    ),
1832    (
1833        Language::Dockerfile,
1834        StaticLangConfig {
1835            line_comments: &["#"],
1836            block_comment: None,
1837            allow_single_quote_strings: false,
1838            allow_double_quote_strings: false,
1839            allow_triple_quote_strings: false,
1840            allow_csharp_verbatim_strings: false,
1841            symbol_patterns: SP_NONE,
1842            has_preprocessor: false,
1843        },
1844    ),
1845    (
1846        Language::Elixir,
1847        StaticLangConfig {
1848            line_comments: &["#"],
1849            block_comment: None,
1850            allow_single_quote_strings: true,
1851            allow_double_quote_strings: true,
1852            allow_triple_quote_strings: false,
1853            allow_csharp_verbatim_strings: false,
1854            symbol_patterns: SP_ELIXIR,
1855            has_preprocessor: false,
1856        },
1857    ),
1858    (
1859        Language::Erlang,
1860        StaticLangConfig {
1861            line_comments: &["%"],
1862            block_comment: None,
1863            allow_single_quote_strings: false,
1864            allow_double_quote_strings: true,
1865            allow_triple_quote_strings: false,
1866            allow_csharp_verbatim_strings: false,
1867            symbol_patterns: SP_ERLANG,
1868            has_preprocessor: false,
1869        },
1870    ),
1871    (
1872        Language::FSharp,
1873        StaticLangConfig {
1874            line_comments: &["//"],
1875            block_comment: Some(("(*", "*)")),
1876            allow_single_quote_strings: false,
1877            allow_double_quote_strings: true,
1878            allow_triple_quote_strings: false,
1879            allow_csharp_verbatim_strings: false,
1880            symbol_patterns: SP_FSHARP,
1881            has_preprocessor: false,
1882        },
1883    ),
1884    (
1885        Language::Groovy,
1886        StaticLangConfig {
1887            line_comments: &["//"],
1888            block_comment: Some(("/*", "*/")),
1889            allow_single_quote_strings: true,
1890            allow_double_quote_strings: true,
1891            allow_triple_quote_strings: false,
1892            allow_csharp_verbatim_strings: false,
1893            symbol_patterns: SP_GROOVY,
1894            has_preprocessor: false,
1895        },
1896    ),
1897    (
1898        Language::Haskell,
1899        StaticLangConfig {
1900            line_comments: &["--"],
1901            block_comment: Some(("{-", "-}")),
1902            allow_single_quote_strings: true,
1903            allow_double_quote_strings: true,
1904            allow_triple_quote_strings: false,
1905            allow_csharp_verbatim_strings: false,
1906            symbol_patterns: SP_HASKELL,
1907            has_preprocessor: false,
1908        },
1909    ),
1910    (
1911        Language::Html,
1912        StaticLangConfig {
1913            line_comments: &[],
1914            block_comment: Some(("<!--", "-->")),
1915            allow_single_quote_strings: false,
1916            allow_double_quote_strings: false,
1917            allow_triple_quote_strings: false,
1918            allow_csharp_verbatim_strings: false,
1919            symbol_patterns: SP_NONE,
1920            has_preprocessor: false,
1921        },
1922    ),
1923    (
1924        Language::Julia,
1925        StaticLangConfig {
1926            line_comments: &["#"],
1927            block_comment: Some(("#=", "=#")),
1928            allow_single_quote_strings: false,
1929            allow_double_quote_strings: true,
1930            allow_triple_quote_strings: true,
1931            allow_csharp_verbatim_strings: false,
1932            symbol_patterns: SP_JULIA,
1933            has_preprocessor: false,
1934        },
1935    ),
1936    (
1937        Language::Kotlin,
1938        StaticLangConfig {
1939            line_comments: &["//"],
1940            block_comment: Some(("/*", "*/")),
1941            allow_single_quote_strings: true,
1942            allow_double_quote_strings: true,
1943            allow_triple_quote_strings: false,
1944            allow_csharp_verbatim_strings: false,
1945            symbol_patterns: SP_KOTLIN,
1946            has_preprocessor: false,
1947        },
1948    ),
1949    (
1950        Language::Lua,
1951        StaticLangConfig {
1952            line_comments: &["--"],
1953            block_comment: Some(("--[[", "]]")),
1954            allow_single_quote_strings: true,
1955            allow_double_quote_strings: true,
1956            allow_triple_quote_strings: false,
1957            allow_csharp_verbatim_strings: false,
1958            symbol_patterns: SP_LUA,
1959            has_preprocessor: false,
1960        },
1961    ),
1962    (
1963        Language::Makefile,
1964        StaticLangConfig {
1965            line_comments: &["#"],
1966            block_comment: None,
1967            allow_single_quote_strings: false,
1968            allow_double_quote_strings: false,
1969            allow_triple_quote_strings: false,
1970            allow_csharp_verbatim_strings: false,
1971            symbol_patterns: SP_NONE,
1972            has_preprocessor: false,
1973        },
1974    ),
1975    (
1976        Language::Nim,
1977        StaticLangConfig {
1978            line_comments: &["#"],
1979            block_comment: Some(("#[", "]#")),
1980            allow_single_quote_strings: true,
1981            allow_double_quote_strings: true,
1982            allow_triple_quote_strings: false,
1983            allow_csharp_verbatim_strings: false,
1984            symbol_patterns: SP_NIM,
1985            has_preprocessor: false,
1986        },
1987    ),
1988    (
1989        Language::Ocaml,
1990        StaticLangConfig {
1991            line_comments: &[],
1992            block_comment: Some(("(*", "*)")),
1993            allow_single_quote_strings: false,
1994            allow_double_quote_strings: true,
1995            allow_triple_quote_strings: false,
1996            allow_csharp_verbatim_strings: false,
1997            symbol_patterns: SP_OCAML,
1998            has_preprocessor: false,
1999        },
2000    ),
2001    (
2002        Language::Perl,
2003        StaticLangConfig {
2004            line_comments: &["#"],
2005            block_comment: None,
2006            allow_single_quote_strings: true,
2007            allow_double_quote_strings: true,
2008            allow_triple_quote_strings: false,
2009            allow_csharp_verbatim_strings: false,
2010            symbol_patterns: SP_PERL,
2011            has_preprocessor: false,
2012        },
2013    ),
2014    (
2015        Language::Php,
2016        StaticLangConfig {
2017            line_comments: &["//", "#"],
2018            block_comment: Some(("/*", "*/")),
2019            allow_single_quote_strings: true,
2020            allow_double_quote_strings: true,
2021            allow_triple_quote_strings: false,
2022            allow_csharp_verbatim_strings: false,
2023            symbol_patterns: SP_PHP,
2024            has_preprocessor: false,
2025        },
2026    ),
2027    (
2028        Language::R,
2029        StaticLangConfig {
2030            line_comments: &["#"],
2031            block_comment: None,
2032            allow_single_quote_strings: true,
2033            allow_double_quote_strings: true,
2034            allow_triple_quote_strings: false,
2035            allow_csharp_verbatim_strings: false,
2036            symbol_patterns: SP_R,
2037            has_preprocessor: false,
2038        },
2039    ),
2040    (
2041        Language::Ruby,
2042        StaticLangConfig {
2043            line_comments: &["#"],
2044            block_comment: None,
2045            allow_single_quote_strings: true,
2046            allow_double_quote_strings: true,
2047            allow_triple_quote_strings: false,
2048            allow_csharp_verbatim_strings: false,
2049            symbol_patterns: SP_RUBY,
2050            has_preprocessor: false,
2051        },
2052    ),
2053    (
2054        Language::Scala,
2055        StaticLangConfig {
2056            line_comments: &["//"],
2057            block_comment: Some(("/*", "*/")),
2058            allow_single_quote_strings: true,
2059            allow_double_quote_strings: true,
2060            allow_triple_quote_strings: false,
2061            allow_csharp_verbatim_strings: false,
2062            symbol_patterns: SP_SCALA,
2063            has_preprocessor: false,
2064        },
2065    ),
2066    (
2067        Language::Scss,
2068        StaticLangConfig {
2069            line_comments: &["//"],
2070            block_comment: Some(("/*", "*/")),
2071            allow_single_quote_strings: true,
2072            allow_double_quote_strings: true,
2073            allow_triple_quote_strings: false,
2074            allow_csharp_verbatim_strings: false,
2075            symbol_patterns: SP_NONE,
2076            has_preprocessor: false,
2077        },
2078    ),
2079    (
2080        Language::Sql,
2081        StaticLangConfig {
2082            line_comments: &["--"],
2083            block_comment: Some(("/*", "*/")),
2084            allow_single_quote_strings: true,
2085            allow_double_quote_strings: false,
2086            allow_triple_quote_strings: false,
2087            allow_csharp_verbatim_strings: false,
2088            symbol_patterns: SP_SQL,
2089            has_preprocessor: false,
2090        },
2091    ),
2092    (
2093        Language::Swift,
2094        StaticLangConfig {
2095            line_comments: &["//"],
2096            block_comment: Some(("/*", "*/")),
2097            allow_single_quote_strings: false,
2098            allow_double_quote_strings: true,
2099            allow_triple_quote_strings: false,
2100            allow_csharp_verbatim_strings: false,
2101            symbol_patterns: SP_SWIFT,
2102            has_preprocessor: false,
2103        },
2104    ),
2105    (
2106        Language::Xml,
2107        StaticLangConfig {
2108            line_comments: &[],
2109            block_comment: Some(("<!--", "-->")),
2110            allow_single_quote_strings: false,
2111            allow_double_quote_strings: false,
2112            allow_triple_quote_strings: false,
2113            allow_csharp_verbatim_strings: false,
2114            symbol_patterns: SP_NONE,
2115            has_preprocessor: false,
2116        },
2117    ),
2118    (
2119        Language::Zig,
2120        StaticLangConfig {
2121            line_comments: &["//"],
2122            block_comment: None,
2123            allow_single_quote_strings: true,
2124            allow_double_quote_strings: true,
2125            allow_triple_quote_strings: false,
2126            allow_csharp_verbatim_strings: false,
2127            symbol_patterns: SP_ZIG,
2128            has_preprocessor: false,
2129        },
2130    ),
2131];
2132
2133/// Per-call IEEE 1045-1992 flags derived from `AnalysisOptions` plus per-language properties.
2134/// Private to this crate; constructed inside `analyze_text`.
2135#[derive(Debug, Clone, Copy)]
2136struct IeeeFlags {
2137    /// True for C, C++, and Objective-C โ€” languages with a C preprocessor.
2138    has_preprocessor_directives: bool,
2139    /// Mirrors `AnalysisOptions::blank_in_block_comment_as_comment`.
2140    blank_in_block_comment_as_comment: bool,
2141    /// Mirrors `AnalysisOptions::collapse_continuation_lines`.
2142    collapse_continuation_lines: bool,
2143}
2144
2145#[derive(Debug, Clone, Copy)]
2146enum StringState {
2147    Single(char),
2148    Triple(&'static str),
2149    VerbatimDouble,
2150}
2151
2152#[allow(clippy::struct_excessive_bools)]
2153#[derive(Debug, Default)]
2154struct LineFacts {
2155    has_code: bool,
2156    has_single_comment: bool,
2157    has_multi_comment: bool,
2158    has_docstring: bool,
2159}
2160
2161/// Process one character while the lexer is inside a string literal.
2162///
2163/// Returns `(new_string_state, advance)` where `advance` is the number of chars to skip.
2164fn process_string_char(
2165    state: StringState,
2166    chars: &[char],
2167    i: usize,
2168) -> (Option<StringState>, usize) {
2169    match state {
2170        StringState::Single(delim) => {
2171            if chars[i] == '\\' {
2172                return (Some(state), 2); // skip escaped character
2173            }
2174            if chars[i] == delim {
2175                (None, 1)
2176            } else {
2177                (Some(state), 1)
2178            }
2179        }
2180        StringState::Triple(delim) => {
2181            if starts_with(chars, i, delim) {
2182                (None, delim.len())
2183            } else {
2184                (Some(state), 1)
2185            }
2186        }
2187        StringState::VerbatimDouble => {
2188            if starts_with(chars, i, "\"\"") {
2189                return (Some(state), 2); // escaped quote-quote inside verbatim string
2190            }
2191            if chars[i] == '"' {
2192                (None, 1)
2193            } else {
2194                (Some(state), 1)
2195            }
2196        }
2197    }
2198}
2199
2200/// Process one character while the lexer is inside a block comment.
2201///
2202/// Returns `(still_in_block_comment, advance)`.
2203fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2204    if starts_with(chars, i, close) {
2205        (false, close.len())
2206    } else {
2207        (true, 1)
2208    }
2209}
2210
2211/// Attempt to begin a new string literal at position `i`.
2212///
2213/// Returns `Some((new_state, advance))` when a string opener is detected, else `None`.
2214fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2215    if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2216        return Some((StringState::VerbatimDouble, 2));
2217    }
2218    if config.allow_triple_quote_strings {
2219        if starts_with(chars, i, "\"\"\"") {
2220            return Some((StringState::Triple("\"\"\""), 3));
2221        }
2222        if starts_with(chars, i, "'''") {
2223            return Some((StringState::Triple("'''"), 3));
2224        }
2225    }
2226    if config.allow_single_quote_strings && chars[i] == '\'' {
2227        return Some((StringState::Single('\''), 1));
2228    }
2229    if config.allow_double_quote_strings && chars[i] == '"' {
2230        return Some((StringState::Single('"'), 1));
2231    }
2232    None
2233}
2234
2235/// Advance past one character position while inside a block comment.
2236///
2237/// Updates `in_block_comment` if the closing delimiter is found and returns the
2238/// number of characters consumed. Returns 0 when no block-comment config is set
2239/// (preserving the caller's `continue`-without-advance behaviour for that impossible state).
2240fn step_through_block_comment(
2241    chars: &[char],
2242    i: usize,
2243    block_comment: Option<(&'static str, &'static str)>,
2244    in_block_comment: &mut bool,
2245) -> usize {
2246    if let Some((_, close)) = block_comment {
2247        let (still_in, advance) = process_block_comment_char(chars, i, close);
2248        *in_block_comment = still_in;
2249        return advance;
2250    }
2251    0
2252}
2253
2254/// If the character at `i` starts a block comment, return the length of the opening
2255/// delimiter so the caller can advance past it. Returns `None` if no match.
2256fn try_open_block_comment(
2257    chars: &[char],
2258    i: usize,
2259    block_comment: Option<(&'static str, &'static str)>,
2260) -> Option<usize> {
2261    let (open, _) = block_comment?;
2262    starts_with(chars, i, open).then_some(open.len())
2263}
2264
2265/// Scan a single physical line and update `facts`, `in_block_comment`, and `string_state`.
2266///
2267/// Returns `true` when the caller should break out of the per-line loop early (line comment hit).
2268fn scan_line(
2269    chars: &[char],
2270    config: &ScanConfig,
2271    facts: &mut LineFacts,
2272    in_block_comment: &mut bool,
2273    string_state: &mut Option<StringState>,
2274) {
2275    let mut i = 0usize;
2276    while i < chars.len() {
2277        // Inside a string literal โ€” advance until the closing delimiter.
2278        if let Some(state) = *string_state {
2279            facts.has_code = true;
2280            let (new_state, advance) = process_string_char(state, chars, i);
2281            *string_state = new_state;
2282            i += advance;
2283            continue;
2284        }
2285
2286        // Inside a block comment โ€” advance until the closing delimiter.
2287        if *in_block_comment {
2288            facts.has_multi_comment = true;
2289            i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2290            continue;
2291        }
2292
2293        // Whitespace outside any string/comment โ€” skip.
2294        if chars[i].is_whitespace() {
2295            i += 1;
2296            continue;
2297        }
2298
2299        // Attempt to open a string literal.
2300        if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2301            facts.has_code = true;
2302            *string_state = Some(new_state);
2303            i += advance;
2304            continue;
2305        }
2306
2307        // Attempt to open a block comment.
2308        if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2309            facts.has_multi_comment = true;
2310            *in_block_comment = true;
2311            i += advance;
2312            continue;
2313        }
2314
2315        // Line comment โ€” rest of the line is a comment; stop scanning.
2316        if config
2317            .line_comments
2318            .iter()
2319            .any(|prefix| starts_with(chars, i, prefix))
2320        {
2321            facts.has_single_comment = true;
2322            break;
2323        }
2324
2325        // Plain code character.
2326        facts.has_code = true;
2327        i += 1;
2328    }
2329}
2330
2331/// Apply IEEE 1045-1992 ยง4.2 preprocessor-directive tracking and continuation-line merging,
2332/// then emit the finalized `LineFacts` for this physical line.
2333///
2334/// Returns `None` when the line is part of a continuation sequence and should be deferred.
2335fn finalize_line_facts(
2336    facts: LineFacts,
2337    trimmed: &str,
2338    raw: &mut RawLineCounts,
2339    ieee: IeeeFlags,
2340    in_block_comment: bool,
2341    string_state: Option<StringState>,
2342    pending_continuation: &mut Option<LineFacts>,
2343) -> Option<LineFacts> {
2344    // IEEE 1045-1992 ยง4.2: track preprocessor/compiler directive lines (C/C++/ObjC).
2345    // A directive line is a pure code line (no comment on the same physical line) whose
2346    // trimmed content starts with '#'.
2347    if ieee.has_preprocessor_directives
2348        && facts.has_code
2349        && !facts.has_single_comment
2350        && !facts.has_multi_comment
2351        && trimmed.starts_with('#')
2352    {
2353        raw.compiler_directive_lines += 1;
2354    }
2355
2356    // IEEE 1045-1992 continuation-line handling.
2357    // A line is a continuation starter when it ends with '\' outside any comment or string.
2358    let is_continuation = ieee.collapse_continuation_lines
2359        && !in_block_comment
2360        && string_state.is_none()
2361        && trimmed.ends_with('\\');
2362
2363    if is_continuation {
2364        let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2365        pending.has_code |= facts.has_code;
2366        pending.has_single_comment |= facts.has_single_comment;
2367        pending.has_multi_comment |= facts.has_multi_comment;
2368        pending.has_docstring |= facts.has_docstring;
2369        return None; // defer classification until the sequence ends
2370    }
2371
2372    // Merge any accumulated continuation facts into the final line.
2373    let emit = if let Some(pending) = pending_continuation.take() {
2374        LineFacts {
2375            has_code: pending.has_code | facts.has_code,
2376            has_single_comment: pending.has_single_comment | facts.has_single_comment,
2377            has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2378            has_docstring: pending.has_docstring | facts.has_docstring,
2379        }
2380    } else {
2381        facts
2382    };
2383    Some(emit)
2384}
2385
2386/// Scan and classify one physical line, updating all running state in place.
2387///
2388/// Pre-classified lines (present in `config.skip_lines`) are counted as docstring-comment
2389/// lines and returned early without further analysis.
2390#[allow(clippy::needless_pass_by_value)]
2391#[allow(clippy::too_many_arguments)]
2392#[allow(clippy::many_single_char_names)] // destructuring return from count_symbols; names match field roles
2393fn process_physical_line(
2394    line: &str,
2395    line_idx: usize,
2396    config: &ScanConfig,
2397    raw: &mut RawLineCounts,
2398    in_block_comment: &mut bool,
2399    string_state: &mut Option<StringState>,
2400    pending_continuation: &mut Option<LineFacts>,
2401    ieee: IeeeFlags,
2402) {
2403    raw.total_physical_lines += 1;
2404
2405    if config.skip_lines.contains(&line_idx) {
2406        raw.docstring_comment_lines += 1;
2407        return;
2408    }
2409
2410    let trimmed = line.trim();
2411    let mut facts = LineFacts::default();
2412
2413    // IEEE 1045-1992: blank lines inside block comments are comment lines by default.
2414    // When blank_in_block_comment_as_comment is false, blank lines keep their blank
2415    // classification even while inside a block comment.
2416    if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2417        facts.has_multi_comment = true;
2418    }
2419
2420    let chars: Vec<char> = line.chars().collect();
2421    scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2422
2423    let Some(emit) = finalize_line_facts(
2424        facts,
2425        trimmed,
2426        raw,
2427        ieee,
2428        *in_block_comment,
2429        *string_state,
2430        pending_continuation,
2431    ) else {
2432        return;
2433    };
2434
2435    classify_line(raw, &emit, trimmed);
2436
2437    if emit.has_code {
2438        let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2439        raw.functions += f;
2440        raw.classes += c;
2441        raw.variables += v;
2442        raw.imports += i;
2443        raw.test_count += t;
2444        raw.test_assertion_count += a;
2445        raw.test_suite_count += s;
2446    }
2447}
2448
2449#[allow(clippy::needless_pass_by_value)]
2450fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2451    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2452    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2453
2454    let mut raw = RawLineCounts::default();
2455    let mut warnings = Vec::new();
2456
2457    let mut in_block_comment = false;
2458    let mut string_state: Option<StringState> = None;
2459    // IEEE continuation-line state: accumulates facts across a backslash-continued sequence.
2460    let mut pending_continuation: Option<LineFacts> = None;
2461
2462    for (line_idx, line) in lines.iter().enumerate() {
2463        process_physical_line(
2464            line,
2465            line_idx,
2466            &config,
2467            &mut raw,
2468            &mut in_block_comment,
2469            &mut string_state,
2470            &mut pending_continuation,
2471            ieee,
2472        );
2473    }
2474
2475    // Flush any pending continuation that reaches end-of-file without a closing line.
2476    if let Some(pending) = pending_continuation.take() {
2477        classify_line(&mut raw, &pending, "");
2478    }
2479
2480    if in_block_comment {
2481        warnings.push("unclosed block comment detected; result is best effort".into());
2482    }
2483    if string_state.is_some() {
2484        warnings.push("unclosed string literal detected; result is best effort".into());
2485    }
2486
2487    RawFileAnalysis {
2488        raw,
2489        parse_mode: if warnings.is_empty() {
2490            ParseMode::Lexical
2491        } else {
2492            ParseMode::LexicalBestEffort
2493        },
2494        warnings,
2495    }
2496}
2497
2498const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2499    if facts.has_docstring {
2500        raw.docstring_comment_lines += 1;
2501    } else if !facts.has_code
2502        && !facts.has_single_comment
2503        && !facts.has_multi_comment
2504        && trimmed.is_empty()
2505    {
2506        raw.blank_only_lines += 1;
2507    } else if facts.has_code && facts.has_single_comment {
2508        raw.mixed_code_single_comment_lines += 1;
2509    } else if facts.has_code && facts.has_multi_comment {
2510        raw.mixed_code_multi_comment_lines += 1;
2511    } else if facts.has_code {
2512        raw.code_only_lines += 1;
2513    } else if facts.has_single_comment {
2514        raw.single_comment_only_lines += 1;
2515    } else if facts.has_multi_comment {
2516        raw.multi_comment_only_lines += 1;
2517    } else if trimmed.is_empty() {
2518        raw.blank_only_lines += 1;
2519    } else {
2520        raw.skipped_unknown_lines += 1;
2521    }
2522}
2523
2524fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2525    let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2526    // For return-type-led languages (C/C++): match prefix AND `(` present AND no `=` sits
2527    // between the prefix start and the first `(` (guards against `void* p = malloc(n)`).
2528    let fn_pp = if patterns.functions_prefix_paren.is_empty() {
2529        0
2530    } else if let Some(paren_pos) = trimmed.find('(') {
2531        if !trimmed[..paren_pos].contains('=') {
2532            hit(patterns.functions_prefix_paren)
2533        } else {
2534            0
2535        }
2536    } else {
2537        0
2538    };
2539    (
2540        hit(patterns.functions) | fn_pp,
2541        hit(patterns.classes),
2542        hit(patterns.variables),
2543        hit(patterns.imports),
2544        hit(patterns.tests),
2545        hit(patterns.assertions),
2546        hit(patterns.test_suites),
2547    )
2548}
2549
2550fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2551    let needle_chars: Vec<char> = needle.chars().collect();
2552    chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2553}
2554
2555#[derive(Debug, Clone)]
2556struct PyContext {
2557    indent: usize,
2558    expect_docstring: bool,
2559}
2560
2561/// Update `contexts` to pop any scopes that the current `indent` has outdented past.
2562fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2563    while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2564        contexts.pop();
2565    }
2566}
2567
2568/// Handle `pending_block_indent` transition: push a new docstring-expecting context when we
2569/// detect the first indented line of a new block, or cancel the pending state otherwise.
2570fn py_handle_pending_indent(
2571    pending_block_indent: &mut Option<usize>,
2572    contexts: &mut Vec<PyContext>,
2573    indent: usize,
2574    trimmed: &str,
2575) {
2576    let Some(base_indent) = *pending_block_indent else {
2577        return;
2578    };
2579    if indent > base_indent {
2580        contexts.push(PyContext {
2581            indent,
2582            expect_docstring: true,
2583        });
2584        *pending_block_indent = None;
2585    } else if !trimmed.starts_with('@') {
2586        *pending_block_indent = None;
2587    }
2588}
2589
2590/// Check whether the current line is a docstring opener in the current context.
2591///
2592/// If it is, records the line, adjusts `ctx.expect_docstring`, and optionally sets
2593/// `active_docstring` for multi-line docstrings. Returns `true` when the caller should
2594/// `continue` to the next line.
2595fn py_try_record_docstring(
2596    ctx: &mut PyContext,
2597    trimmed: &str,
2598    idx: usize,
2599    docstring_lines: &mut HashSet<usize>,
2600    active_docstring: &mut Option<(&'static str, usize)>,
2601) -> bool {
2602    if !ctx.expect_docstring {
2603        return false;
2604    }
2605    if let Some(delim) = docstring_delimiter(trimmed) {
2606        docstring_lines.insert(idx);
2607        ctx.expect_docstring = false;
2608        if !closes_triple_docstring(trimmed, delim, true) {
2609            *active_docstring = Some((delim, idx));
2610        }
2611        return true;
2612    }
2613    ctx.expect_docstring = false;
2614    false
2615}
2616
2617/// Advance through an active multi-line docstring: marks the current line and clears
2618/// `active_docstring` when the closing delimiter is found. Returns `true` when the caller
2619/// should `continue` to the next line (i.e. we were inside a docstring).
2620fn track_active_docstring(
2621    active_docstring: &mut Option<(&'static str, usize)>,
2622    docstring_lines: &mut HashSet<usize>,
2623    idx: usize,
2624    trimmed: &str,
2625) -> bool {
2626    let Some((delim, start_line)) = *active_docstring else {
2627        return false;
2628    };
2629    docstring_lines.insert(idx);
2630    if closes_triple_docstring(trimmed, delim, idx == start_line) {
2631        *active_docstring = None;
2632    }
2633    true
2634}
2635
2636/// Attempt to record a docstring opener using the top of the context stack.
2637/// Returns `true` when the caller should `continue` to the next line.
2638fn try_record_docstring_if_context(
2639    contexts: &mut [PyContext],
2640    trimmed: &str,
2641    idx: usize,
2642    docstring_lines: &mut HashSet<usize>,
2643    active_docstring: &mut Option<(&'static str, usize)>,
2644) -> bool {
2645    let Some(ctx) = contexts.last_mut() else {
2646        return false;
2647    };
2648    py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2649}
2650
2651/// If an unclosed docstring is still active at end-of-file, mark all remaining lines.
2652fn mark_unclosed_docstring_lines(
2653    active_docstring: Option<&(&'static str, usize)>,
2654    docstring_lines: &mut HashSet<usize>,
2655    num_lines: usize,
2656) {
2657    if let Some(&(_, start_line)) = active_docstring {
2658        for idx in start_line..num_lines {
2659            docstring_lines.insert(idx);
2660        }
2661    }
2662}
2663
2664fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2665    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2666    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2667
2668    let mut docstring_lines = HashSet::new();
2669    let mut contexts = vec![PyContext {
2670        indent: 0,
2671        expect_docstring: true,
2672    }];
2673    let mut pending_block_indent: Option<usize> = None;
2674    let mut active_docstring: Option<(&'static str, usize)> = None;
2675
2676    for (idx, line) in lines.iter().enumerate() {
2677        let trimmed = line.trim();
2678        let indent = leading_indent(line);
2679
2680        if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2681            continue;
2682        }
2683
2684        // Blank lines and comment lines don't affect docstring detection.
2685        if trimmed.is_empty() || trimmed.starts_with('#') {
2686            continue;
2687        }
2688
2689        py_pop_outdented_contexts(&mut contexts, indent);
2690        py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2691
2692        if try_record_docstring_if_context(
2693            &mut contexts,
2694            trimmed,
2695            idx,
2696            &mut docstring_lines,
2697            &mut active_docstring,
2698        ) {
2699            continue;
2700        }
2701
2702        if is_python_block_header(trimmed) {
2703            pending_block_indent = Some(indent);
2704        }
2705    }
2706
2707    mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2708
2709    docstring_lines
2710}
2711
2712fn leading_indent(line: &str) -> usize {
2713    line.chars().take_while(|c| c.is_whitespace()).count()
2714}
2715
2716fn is_python_block_header(trimmed: &str) -> bool {
2717    (trimmed.starts_with("def ")
2718        || trimmed.starts_with("async def ")
2719        || trimmed.starts_with("class "))
2720        && trimmed.ends_with(':')
2721}
2722
2723fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2724    let mut idx = 0usize;
2725    let bytes = trimmed.as_bytes();
2726    while idx < bytes.len() {
2727        let c = bytes[idx] as char;
2728        if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2729            idx += 1;
2730            continue;
2731        }
2732        break;
2733    }
2734
2735    let rest = &trimmed[idx..];
2736    if rest.starts_with("\"\"\"") {
2737        Some("\"\"\"")
2738    } else if rest.starts_with("'''") {
2739        Some("'''")
2740    } else {
2741        None
2742    }
2743}
2744
2745fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2746    let mut occurrences = 0usize;
2747    let mut search = trimmed;
2748    while let Some(index) = search.find(delim) {
2749        occurrences += 1;
2750        search = &search[index + delim.len()..];
2751    }
2752
2753    if same_line_as_start {
2754        occurrences >= 2
2755    } else {
2756        occurrences >= 1
2757    }
2758}
2759
2760/// Tree-sitter-backed adapters (compiled only when the `tree-sitter` feature is enabled).
2761///
2762/// When parsing succeeds the result is used directly; on any failure the caller falls back
2763/// to the lexical state machine.
2764#[cfg(feature = "tree-sitter")]
2765pub mod ts {
2766    use tree_sitter::Node;
2767
2768    use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2769
2770    /// Classify every line of `text` using a tree-sitter grammar.
2771    ///
2772    /// `comment_node_kinds` โ€” node type names that represent comments in this grammar
2773    /// `docstring_stmt_kind` โ€” optional parent node type whose direct `string` child is a docstring
2774    fn analyze_lines(
2775        text: &str,
2776        ts_language: &tree_sitter::Language,
2777        comment_node_kinds: &[&str],
2778        docstring_stmt_kind: Option<&str>,
2779    ) -> Option<RawFileAnalysis> {
2780        let mut parser = tree_sitter::Parser::new();
2781        parser.set_language(ts_language).ok()?;
2782        let tree = parser.parse(text, None)?;
2783
2784        let lines: Vec<&str> = text.split_terminator('\n').collect();
2785        let n = lines.len();
2786
2787        let mut has_code = vec![false; n];
2788        let mut has_comment = vec![false; n];
2789        let mut comment_is_block = vec![false; n];
2790        let mut has_docstring = vec![false; n];
2791
2792        // Walk every node in the tree and mark line arrays.
2793        let mut ctx = VisitCtx {
2794            source: text.as_bytes(),
2795            comment_kinds: comment_node_kinds,
2796            docstring_stmt_kind,
2797            has_code: &mut has_code,
2798            has_comment: &mut has_comment,
2799            comment_is_block: &mut comment_is_block,
2800            has_docstring: &mut has_docstring,
2801        };
2802        visit(tree.root_node(), &mut ctx);
2803
2804        let mut raw = RawLineCounts::default();
2805        classify_ts_lines(
2806            &lines,
2807            &has_code,
2808            &has_comment,
2809            &comment_is_block,
2810            &has_docstring,
2811            &mut raw,
2812        );
2813
2814        Some(RawFileAnalysis {
2815            raw,
2816            parse_mode: ParseMode::TreeSitter,
2817            warnings: Vec::new(),
2818        })
2819    }
2820
2821    /// Flags describing what kinds of content appear on a single line.
2822    // Four bools are the natural representation for these four independent properties.
2823    #[allow(clippy::struct_excessive_bools)]
2824    #[derive(Clone, Copy)]
2825    struct TsLineFlags {
2826        has_code: bool,
2827        has_comment: bool,
2828        comment_is_block: bool,
2829        has_docstring: bool,
2830    }
2831
2832    /// Classify a single tree-sitter-annotated line and accumulate into `raw`.
2833    const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2834        if trimmed.is_empty() {
2835            raw.blank_only_lines += 1;
2836        } else if flags.has_docstring && !flags.has_code {
2837            raw.docstring_comment_lines += 1;
2838        } else if flags.has_code && flags.has_comment {
2839            // Classify the mixed line as single or multi based on what kind of comment is on it.
2840            if flags.comment_is_block {
2841                raw.mixed_code_multi_comment_lines += 1;
2842            } else {
2843                raw.mixed_code_single_comment_lines += 1;
2844            }
2845        } else if flags.has_comment {
2846            if flags.comment_is_block {
2847                raw.multi_comment_only_lines += 1;
2848            } else {
2849                raw.single_comment_only_lines += 1;
2850            }
2851        } else {
2852            raw.code_only_lines += 1;
2853        }
2854    }
2855
2856    /// Classify each tree-sitter-annotated line and accumulate counts into `raw`.
2857    fn classify_ts_lines(
2858        lines: &[&str],
2859        has_code: &[bool],
2860        has_comment: &[bool],
2861        comment_is_block: &[bool],
2862        has_docstring: &[bool],
2863        raw: &mut RawLineCounts,
2864    ) {
2865        for i in 0..lines.len() {
2866            raw.total_physical_lines += 1;
2867            classify_ts_line(
2868                lines[i].trim(),
2869                TsLineFlags {
2870                    has_code: has_code[i],
2871                    has_comment: has_comment[i],
2872                    comment_is_block: comment_is_block[i],
2873                    has_docstring: has_docstring[i],
2874                },
2875                raw,
2876            );
2877        }
2878    }
2879
2880    struct VisitCtx<'a> {
2881        source: &'a [u8],
2882        comment_kinds: &'a [&'a str],
2883        docstring_stmt_kind: Option<&'a str>,
2884        has_code: &'a mut Vec<bool>,
2885        has_comment: &'a mut Vec<bool>,
2886        comment_is_block: &'a mut Vec<bool>,
2887        has_docstring: &'a mut Vec<bool>,
2888    }
2889
2890    /// Mark all rows of a comment node and detect whether it is a block comment.
2891    fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2892        let start_row = node.start_position().row;
2893        let end_row = node.end_position().row;
2894        let first_two = node
2895            .utf8_text(ctx.source)
2896            .unwrap_or("")
2897            .get(..2)
2898            .unwrap_or("");
2899        let is_block = first_two == "/*" || first_two == "<#";
2900        for row in start_row..=end_row {
2901            if row < ctx.has_comment.len() {
2902                ctx.has_comment[row] = true;
2903                if is_block {
2904                    ctx.comment_is_block[row] = true;
2905                }
2906            }
2907        }
2908    }
2909
2910    /// If `node` is an `expression_statement` whose sole named child is a string literal,
2911    /// mark those rows as docstring and return `true`.
2912    fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2913        let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2914            return false;
2915        };
2916        if kind != stmt_kind || node.named_child_count() != 1 {
2917            return false;
2918        }
2919        let Some(child) = node.named_child(0) else {
2920            return false;
2921        };
2922        if child.kind() != "string" {
2923            return false;
2924        }
2925        let child_start = child.start_position().row;
2926        let child_end = child.end_position().row;
2927        for row in child_start..=child_end {
2928            if row < ctx.has_docstring.len() {
2929                ctx.has_docstring[row] = true;
2930            }
2931        }
2932        true
2933    }
2934
2935    /// Mark all rows of a leaf (non-comment, non-extra) node as code.
2936    fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2937        let start_row = node.start_position().row;
2938        let end_row = node.end_position().row;
2939        for row in start_row..=end_row {
2940            if row < ctx.has_code.len() {
2941                ctx.has_code[row] = true;
2942            }
2943        }
2944    }
2945
2946    #[allow(clippy::too_many_lines)]
2947    fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2948        // NOSONAR
2949        let kind = node.kind();
2950
2951        // Comment node โ€” mark rows as comment, detect block vs. line comment.
2952        if ctx.comment_kinds.contains(&kind) {
2953            visit_comment_node(node, ctx);
2954            return;
2955        }
2956
2957        // Python docstring: expression_statement whose only named child is a string literal.
2958        if visit_maybe_docstring(node, kind, ctx) {
2959            return;
2960        }
2961
2962        // Leaf non-comment node: mark as code.
2963        if node.child_count() == 0 && !node.is_extra() {
2964            visit_leaf_code(node, ctx);
2965            return;
2966        }
2967
2968        for i in 0..node.child_count() {
2969            #[allow(clippy::cast_possible_truncation)]
2970            // child_count bounded by tree-sitter u32 capacity
2971            if let Some(child) = node.child(i as u32) {
2972                visit(child, ctx);
2973            }
2974        }
2975    }
2976
2977    /// Parse C or C++ source with tree-sitter-c.
2978    #[must_use]
2979    pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
2980        let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
2981        analyze_lines(text, &lang, &["comment"], None)
2982    }
2983
2984    /// Parse Python source with tree-sitter-python.
2985    #[must_use]
2986    pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
2987        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2988        analyze_lines(text, &lang, &["comment"], Some("expression_statement"))
2989    }
2990}
2991
2992#[cfg(test)]
2993mod tests {
2994    use super::*;
2995
2996    #[test]
2997    fn python_docstrings_are_separated() {
2998        let input = r#""""module docs"""
2999
3000
3001def fn_a():
3002    """function docs"""
3003    value = 1  # trailing comment
3004    return value
3005"#;
3006
3007        let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3008        assert_eq!(result.raw.docstring_comment_lines, 2);
3009        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3010        assert_eq!(result.raw.code_only_lines, 2);
3011    }
3012
3013    #[test]
3014    fn c_style_mixed_lines_are_captured() {
3015        let input = "int x = 1; // note\n/* block */\n";
3016        let result = analyze_text(Language::C, input, AnalysisOptions::default());
3017        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3018        assert_eq!(result.raw.multi_comment_only_lines, 1);
3019    }
3020
3021    #[test]
3022    fn detect_language_by_shebang() {
3023        let language = detect_language(
3024            Path::new("script"),
3025            Some("#!/usr/bin/env bash"),
3026            &BTreeMap::new(),
3027            true,
3028        );
3029        assert_eq!(language, Some(Language::Shell));
3030    }
3031}