Skip to main content

sloc_languages/
lib.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright (C) 2026 Nima Shafie <nimzshafie@gmail.com>
3
4use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12    C,
13    Cpp,
14    CSharp,
15    Go,
16    Java,
17    JavaScript,
18    Python,
19    Rust,
20    Shell,
21    PowerShell,
22    TypeScript,
23    // --- Extended language support ---
24    Assembly,
25    Clojure,
26    Css,
27    Dart,
28    Dockerfile,
29    Elixir,
30    Erlang,
31    FSharp,
32    Groovy,
33    Haskell,
34    Html,
35    Julia,
36    Kotlin,
37    Lua,
38    Makefile,
39    Nim,
40    ObjectiveC,
41    Ocaml,
42    Perl,
43    Php,
44    R,
45    Ruby,
46    Scala,
47    Scss,
48    Sql,
49    Svelte,
50    Swift,
51    Vue,
52    Xml,
53    Zig,
54}
55
56impl Language {
57    #[must_use]
58    pub const fn display_name(&self) -> &'static str {
59        match self {
60            Self::C => "C",
61            Self::Cpp => "C++",
62            Self::CSharp => "C#",
63            Self::Go => "Go",
64            Self::Java => "Java",
65            Self::JavaScript => "JavaScript",
66            Self::Python => "Python",
67            Self::Rust => "Rust",
68            Self::Shell => "Shell",
69            Self::PowerShell => "PowerShell",
70            Self::TypeScript => "TypeScript",
71            Self::Assembly => "Assembly",
72            Self::Clojure => "Clojure",
73            Self::Css => "CSS",
74            Self::Dart => "Dart",
75            Self::Dockerfile => "Dockerfile",
76            Self::Elixir => "Elixir",
77            Self::Erlang => "Erlang",
78            Self::FSharp => "F#",
79            Self::Groovy => "Groovy",
80            Self::Haskell => "Haskell",
81            Self::Html => "HTML",
82            Self::Julia => "Julia",
83            Self::Kotlin => "Kotlin",
84            Self::Lua => "Lua",
85            Self::Makefile => "Makefile",
86            Self::Nim => "Nim",
87            Self::ObjectiveC => "Objective-C",
88            Self::Ocaml => "OCaml",
89            Self::Perl => "Perl",
90            Self::Php => "PHP",
91            Self::R => "R",
92            Self::Ruby => "Ruby",
93            Self::Scala => "Scala",
94            Self::Scss => "SCSS",
95            Self::Sql => "SQL",
96            Self::Svelte => "Svelte",
97            Self::Swift => "Swift",
98            Self::Vue => "Vue",
99            Self::Xml => "XML",
100            Self::Zig => "Zig",
101        }
102    }
103
104    #[must_use]
105    pub const fn as_slug(&self) -> &'static str {
106        match self {
107            Self::C => "c",
108            Self::Cpp => "cpp",
109            Self::CSharp => "csharp",
110            Self::Go => "go",
111            Self::Java => "java",
112            Self::JavaScript => "javascript",
113            Self::Python => "python",
114            Self::Rust => "rust",
115            Self::Shell => "shell",
116            Self::PowerShell => "powershell",
117            Self::TypeScript => "typescript",
118            Self::Assembly => "assembly",
119            Self::Clojure => "clojure",
120            Self::Css => "css",
121            Self::Dart => "dart",
122            Self::Dockerfile => "dockerfile",
123            Self::Elixir => "elixir",
124            Self::Erlang => "erlang",
125            Self::FSharp => "fsharp",
126            Self::Groovy => "groovy",
127            Self::Haskell => "haskell",
128            Self::Html => "html",
129            Self::Julia => "julia",
130            Self::Kotlin => "kotlin",
131            Self::Lua => "lua",
132            Self::Makefile => "makefile",
133            Self::Nim => "nim",
134            Self::ObjectiveC => "objectivec",
135            Self::Ocaml => "ocaml",
136            Self::Perl => "perl",
137            Self::Php => "php",
138            Self::R => "r",
139            Self::Ruby => "ruby",
140            Self::Scala => "scala",
141            Self::Scss => "scss",
142            Self::Sql => "sql",
143            Self::Svelte => "svelte",
144            Self::Swift => "swift",
145            Self::Vue => "vue",
146            Self::Xml => "xml",
147            Self::Zig => "zig",
148        }
149    }
150
151    #[must_use]
152    pub fn from_name(name: &str) -> Option<Self> {
153        match name.trim().to_ascii_lowercase().as_str() {
154            "c" => Some(Self::C),
155            "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
156            "csharp" | "c#" | "cs" => Some(Self::CSharp),
157            "go" | "golang" => Some(Self::Go),
158            "java" => Some(Self::Java),
159            "javascript" | "js" => Some(Self::JavaScript),
160            "python" | "py" => Some(Self::Python),
161            "rust" | "rs" => Some(Self::Rust),
162            "shell" | "sh" | "bash" => Some(Self::Shell),
163            "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
164            "typescript" | "ts" => Some(Self::TypeScript),
165            "assembly" | "asm" => Some(Self::Assembly),
166            "clojure" | "clj" => Some(Self::Clojure),
167            "css" => Some(Self::Css),
168            "dart" => Some(Self::Dart),
169            "dockerfile" | "docker" => Some(Self::Dockerfile),
170            "elixir" | "ex" => Some(Self::Elixir),
171            "erlang" | "erl" => Some(Self::Erlang),
172            "fsharp" | "f#" | "fs" => Some(Self::FSharp),
173            "groovy" => Some(Self::Groovy),
174            "haskell" | "hs" => Some(Self::Haskell),
175            "html" | "htm" => Some(Self::Html),
176            "julia" | "jl" => Some(Self::Julia),
177            "kotlin" | "kt" => Some(Self::Kotlin),
178            "lua" => Some(Self::Lua),
179            "makefile" | "make" | "mk" => Some(Self::Makefile),
180            "nim" => Some(Self::Nim),
181            "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
182            "ocaml" | "ml" => Some(Self::Ocaml),
183            "perl" | "pl" => Some(Self::Perl),
184            "php" => Some(Self::Php),
185            "r" => Some(Self::R),
186            "ruby" | "rb" => Some(Self::Ruby),
187            "scala" => Some(Self::Scala),
188            "scss" | "sass" => Some(Self::Scss),
189            "sql" => Some(Self::Sql),
190            "svelte" => Some(Self::Svelte),
191            "swift" => Some(Self::Swift),
192            "vue" => Some(Self::Vue),
193            "xml" => Some(Self::Xml),
194            "zig" => Some(Self::Zig),
195            _ => None,
196        }
197    }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default)]
201pub struct RawLineCounts {
202    pub total_physical_lines: u64,
203    pub blank_only_lines: u64,
204    pub code_only_lines: u64,
205    pub single_comment_only_lines: u64,
206    pub multi_comment_only_lines: u64,
207    pub mixed_code_single_comment_lines: u64,
208    pub mixed_code_multi_comment_lines: u64,
209    pub docstring_comment_lines: u64,
210    pub skipped_unknown_lines: u64,
211    /// Best-effort count of function/method definition lines detected lexically.
212    #[serde(default)]
213    pub functions: u64,
214    /// Best-effort count of class/struct/trait/type definition lines detected lexically.
215    #[serde(default)]
216    pub classes: u64,
217    /// Best-effort count of variable declaration lines detected lexically.
218    #[serde(default)]
219    pub variables: u64,
220    /// Best-effort count of import/use/include statement lines detected lexically.
221    #[serde(default)]
222    pub imports: u64,
223    /// Lines consisting solely of preprocessor/compiler directives (e.g. `#include`, `#define`
224    /// in C/C++/Objective-C). Always a subset of `code_only_lines`. Controlled by
225    /// `AnalysisConfig::count_compiler_directives`. IEEE 1045-1992 ยง4.2.
226    #[serde(default)]
227    pub compiler_directive_lines: u64,
228    /// Best-effort count of test case / test function definition lines detected lexically
229    /// (`GTest`, Catch2, `PyTest`, `JUnit`, etc.). Always a subset of `code_only_lines`.
230    #[serde(default)]
231    pub test_count: u64,
232    /// Best-effort count of test assertion call lines detected lexically
233    /// (`ASSERT_EQ`, `EXPECT_TRUE`, assertEquals, Assert.AreEqual, `assert_eq`!, etc.).
234    #[serde(default)]
235    pub test_assertion_count: u64,
236    /// Best-effort count of test suite / fixture / group declaration lines detected lexically
237    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
238    #[serde(default)]
239    pub test_suite_count: u64,
240}
241
242#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum ParseMode {
245    Lexical,
246    LexicalBestEffort,
247    TreeSitter,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct RawFileAnalysis {
252    pub raw: RawLineCounts,
253    pub parse_mode: ParseMode,
254    pub warnings: Vec<String>,
255}
256
257/// IEEE 1045-1992 counting options passed from `sloc-core` (built from `AnalysisConfig`).
258///
259/// `analyze_text` accepts this struct so that the caller can control behaviour that the
260/// standard defines as configurable parameters rather than fixed conventions.
261#[derive(Debug, Clone, Copy)]
262pub struct AnalysisOptions {
263    /// When `true` (IEEE 1045-1992 default), blank lines inside block comments count as
264    /// comment lines rather than blank lines.
265    pub blank_in_block_comment_as_comment: bool,
266    /// When `true`, backslash-continued physical lines are collapsed into a single logical
267    /// line for SLOC counting purposes (IEEE logical SLOC mode).
268    pub collapse_continuation_lines: bool,
269}
270
271impl Default for AnalysisOptions {
272    fn default() -> Self {
273        Self {
274            blank_in_block_comment_as_comment: true,
275            collapse_continuation_lines: false,
276        }
277    }
278}
279
280#[must_use]
281pub fn supported_languages() -> BTreeSet<Language> {
282    [
283        Language::Assembly,
284        Language::C,
285        Language::Clojure,
286        Language::Cpp,
287        Language::CSharp,
288        Language::Css,
289        Language::Dart,
290        Language::Dockerfile,
291        Language::Elixir,
292        Language::Erlang,
293        Language::FSharp,
294        Language::Go,
295        Language::Groovy,
296        Language::Haskell,
297        Language::Html,
298        Language::Java,
299        Language::JavaScript,
300        Language::Julia,
301        Language::Kotlin,
302        Language::Lua,
303        Language::Makefile,
304        Language::Nim,
305        Language::ObjectiveC,
306        Language::Ocaml,
307        Language::Perl,
308        Language::Php,
309        Language::PowerShell,
310        Language::Python,
311        Language::R,
312        Language::Ruby,
313        Language::Rust,
314        Language::Scala,
315        Language::Scss,
316        Language::Shell,
317        Language::Sql,
318        Language::Svelte,
319        Language::Swift,
320        Language::TypeScript,
321        Language::Vue,
322        Language::Xml,
323        Language::Zig,
324    ]
325    .into_iter()
326    .collect()
327}
328
329/// Detect language from a shebang line (e.g. `#!/usr/bin/env python3`).
330fn detect_by_shebang(line: &str) -> Option<Language> {
331    let lower = line.to_ascii_lowercase();
332    if !lower.starts_with("#!") {
333        return None;
334    }
335    if lower.contains("python") {
336        return Some(Language::Python);
337    }
338    if lower.contains("pwsh") || lower.contains("powershell") {
339        return Some(Language::PowerShell);
340    }
341    if lower.contains("bash")
342        || lower.contains("/sh")
343        || lower.contains("zsh")
344        || lower.contains("ksh")
345    {
346        return Some(Language::Shell);
347    }
348    if lower.contains("ruby") {
349        return Some(Language::Ruby);
350    }
351    if lower.contains("perl") {
352        return Some(Language::Perl);
353    }
354    if lower.contains("php") {
355        return Some(Language::Php);
356    }
357    if lower.contains("node") || lower.contains("nodejs") {
358        return Some(Language::JavaScript);
359    }
360    None
361}
362
363/// Detect language purely from a (lowercased) file extension.
364fn detect_by_extension(ext: &str) -> Option<Language> {
365    // Static table avoids a large match statement; each extension maps 1-to-1 to a language.
366    static EXT_MAP: &[(&str, Language)] = &[
367        ("c", Language::C),
368        ("h", Language::C),
369        ("cc", Language::Cpp),
370        ("cp", Language::Cpp),
371        ("cpp", Language::Cpp),
372        ("cxx", Language::Cpp),
373        ("hh", Language::Cpp),
374        ("hpp", Language::Cpp),
375        ("hxx", Language::Cpp),
376        ("cs", Language::CSharp),
377        ("go", Language::Go),
378        ("java", Language::Java),
379        ("js", Language::JavaScript),
380        ("mjs", Language::JavaScript),
381        ("cjs", Language::JavaScript),
382        ("py", Language::Python),
383        ("rs", Language::Rust),
384        ("sh", Language::Shell),
385        ("bash", Language::Shell),
386        ("zsh", Language::Shell),
387        ("ksh", Language::Shell),
388        ("ps1", Language::PowerShell),
389        ("psm1", Language::PowerShell),
390        ("psd1", Language::PowerShell),
391        ("ts", Language::TypeScript),
392        ("mts", Language::TypeScript),
393        ("cts", Language::TypeScript),
394        ("asm", Language::Assembly),
395        ("s", Language::Assembly),
396        ("clj", Language::Clojure),
397        ("cljs", Language::Clojure),
398        ("cljc", Language::Clojure),
399        ("edn", Language::Clojure),
400        ("css", Language::Css),
401        ("dart", Language::Dart),
402        ("ex", Language::Elixir),
403        ("exs", Language::Elixir),
404        ("erl", Language::Erlang),
405        ("hrl", Language::Erlang),
406        ("fs", Language::FSharp),
407        ("fsi", Language::FSharp),
408        ("fsx", Language::FSharp),
409        ("groovy", Language::Groovy),
410        ("gradle", Language::Groovy),
411        ("hs", Language::Haskell),
412        ("lhs", Language::Haskell),
413        ("html", Language::Html),
414        ("htm", Language::Html),
415        ("xhtml", Language::Html),
416        ("jl", Language::Julia),
417        ("kt", Language::Kotlin),
418        ("kts", Language::Kotlin),
419        ("lua", Language::Lua),
420        ("mk", Language::Makefile),
421        ("nim", Language::Nim),
422        ("nims", Language::Nim),
423        ("m", Language::ObjectiveC),
424        ("mm", Language::ObjectiveC),
425        ("ml", Language::Ocaml),
426        ("mli", Language::Ocaml),
427        ("pl", Language::Perl),
428        ("pm", Language::Perl),
429        ("t", Language::Perl),
430        ("php", Language::Php),
431        ("php3", Language::Php),
432        ("php4", Language::Php),
433        ("php5", Language::Php),
434        ("php7", Language::Php),
435        ("phtml", Language::Php),
436        ("r", Language::R),
437        ("rb", Language::Ruby),
438        ("rake", Language::Ruby),
439        ("scala", Language::Scala),
440        ("sc", Language::Scala),
441        ("scss", Language::Scss),
442        ("sass", Language::Scss),
443        ("sql", Language::Sql),
444        ("svelte", Language::Svelte),
445        ("swift", Language::Swift),
446        ("vue", Language::Vue),
447        ("xml", Language::Xml),
448        ("xsd", Language::Xml),
449        ("xsl", Language::Xml),
450        ("xslt", Language::Xml),
451        ("svg", Language::Xml),
452        ("zig", Language::Zig),
453    ];
454    EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
455}
456
457/// Detect language from an exact filename (no extension) or well-known filename patterns.
458fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
459    // Dockerfile: exact name or Dockerfile.* variant
460    if filename == "Dockerfile"
461        || filename.starts_with("Dockerfile.")
462        || filename_lower == "dockerfile"
463    {
464        return Some(Language::Dockerfile);
465    }
466    // Makefile variants
467    if matches!(
468        filename,
469        "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
470    ) {
471        return Some(Language::Makefile);
472    }
473    // Ruby ecosystem files that have no extension
474    if matches!(
475        filename,
476        "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
477    ) {
478        return Some(Language::Ruby);
479    }
480    None
481}
482
483#[must_use]
484#[allow(clippy::too_many_lines)]
485pub fn detect_language(
486    path: &Path,
487    first_line: Option<&str>,
488    extension_overrides: &BTreeMap<String, String>,
489    shebang_detection: bool,
490) -> Option<Language> {
491    let extension = path
492        .extension()
493        .and_then(|ext| ext.to_str())
494        .map(str::to_ascii_lowercase);
495
496    // Extension override check (user-configured mappings win over everything)
497    if let Some(ext) = extension.as_ref() {
498        if let Some(override_name) = extension_overrides.get(ext.as_str()) {
499            if let Some(lang) = Language::from_name(override_name) {
500                return Some(lang);
501            }
502        }
503    }
504
505    // Filename-based detection for files that have no extension or use exact names
506    let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
507    let filename_lower = filename.to_ascii_lowercase();
508
509    if let Some(lang) = detect_by_filename(filename, &filename_lower) {
510        return Some(lang);
511    }
512
513    // Extension-based detection
514    if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
515        return Some(lang);
516    }
517
518    // Shebang detection (last resort โ€” only for extensionless scripts)
519    if shebang_detection {
520        if let Some(line) = first_line {
521            if let Some(lang) = detect_by_shebang(line) {
522                return Some(lang);
523            }
524        }
525    }
526
527    None
528}
529
530#[must_use]
531pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
532    // tree-sitter fast-paths (compiled out when feature is disabled)
533    #[cfg(feature = "tree-sitter")]
534    {
535        match language {
536            Language::C | Language::Cpp => {
537                if let Some(result) = ts::analyze_c(text) {
538                    return result;
539                }
540            }
541            Language::Python => {
542                if let Some(result) = ts::analyze_python(text) {
543                    return result;
544                }
545            }
546            _ => {}
547        }
548    }
549
550    let (mut config, has_preprocessor) = language_scan_config(language);
551
552    // Python docstring lines are computed from the text and cannot be a static constant.
553    if language == Language::Python {
554        config.skip_lines = detect_python_docstring_lines(text);
555    }
556
557    // C, C++, and Objective-C have a preprocessor whose directive lines are tracked separately
558    // per IEEE 1045-1992 ยง4.2; every other language uses base flags.
559    let flags = IeeeFlags {
560        has_preprocessor_directives: has_preprocessor,
561        blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
562        collapse_continuation_lines: options.collapse_continuation_lines,
563    };
564    analyze_generic(text, config, flags)
565}
566
567/// Returns the lexical scan configuration for `language` and whether it uses a C preprocessor.
568/// All fields are static constants except `skip_lines`, which is always empty here; callers that
569/// need non-empty skip sets (currently only Python) must populate the field after this call.
570///
571/// The implementation delegates to `LANG_SCAN_TABLE` (a static `&[(Language, StaticLangConfig)]`)
572/// defined below the `SP_*` symbol-pattern constants.  Each language appears exactly once in the
573/// table, so the linear scan is O(|languages|) but avoids a 41-arm `match` statement.
574fn language_scan_config(language: Language) -> (ScanConfig, bool) {
575    let cfg = LANG_SCAN_TABLE
576        .iter()
577        .find_map(|&(l, c)| (l == language).then_some(c))
578        .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
579    (
580        ScanConfig {
581            line_comments: cfg.line_comments,
582            block_comment: cfg.block_comment,
583            allow_single_quote_strings: cfg.allow_single_quote_strings,
584            allow_double_quote_strings: cfg.allow_double_quote_strings,
585            allow_triple_quote_strings: cfg.allow_triple_quote_strings,
586            allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
587            skip_lines: HashSet::new(),
588            symbol_patterns: cfg.symbol_patterns,
589        },
590        cfg.has_preprocessor,
591    )
592}
593
594/// Per-language keyword prefixes used for best-effort structural symbol detection.
595/// Each slice lists line prefixes (after leading whitespace is stripped) that indicate
596/// a definition of that category. Empty slice = detection disabled for that category.
597#[derive(Debug, Clone, Copy)]
598struct SymbolPatterns {
599    functions: &'static [&'static str],
600    classes: &'static [&'static str],
601    variables: &'static [&'static str],
602    imports: &'static [&'static str],
603    /// Line prefixes (after stripping leading whitespace) that indicate a test case or test
604    /// function definition. Matched against code lines only, same as other symbol categories.
605    tests: &'static [&'static str],
606    /// Line prefixes that indicate a test assertion call (`ASSERT_EQ`, assertEquals, `assert_eq`!,
607    /// Assert.AreEqual, etc.). Matched against code lines only.
608    assertions: &'static [&'static str],
609    /// Line prefixes that indicate a test suite / fixture / group declaration
610    /// (`TEST_GROUP`, `BOOST_AUTO_TEST_SUITE`, [`TestClass`], [`TestFixture`], etc.).
611    test_suites: &'static [&'static str],
612}
613
614impl SymbolPatterns {
615    const fn none() -> Self {
616        Self {
617            functions: &[],
618            classes: &[],
619            variables: &[],
620            imports: &[],
621            tests: &[],
622            assertions: &[],
623            test_suites: &[],
624        }
625    }
626}
627
628const SP_NONE: SymbolPatterns = SymbolPatterns::none();
629
630const SP_RUST: SymbolPatterns = SymbolPatterns {
631    functions: &[
632        "fn ",
633        "pub fn ",
634        "pub(crate) fn ",
635        "pub(super) fn ",
636        "async fn ",
637        "pub async fn ",
638        "pub(crate) async fn ",
639        "unsafe fn ",
640        "pub unsafe fn ",
641        "pub(crate) unsafe fn ",
642        "const fn ",
643        "pub const fn ",
644        "pub(crate) const fn ",
645        "extern fn ",
646        "pub extern fn ",
647    ],
648    classes: &[
649        "struct ",
650        "pub struct ",
651        "pub(crate) struct ",
652        "enum ",
653        "pub enum ",
654        "pub(crate) enum ",
655        "trait ",
656        "pub trait ",
657        "pub(crate) trait ",
658        "impl ",
659        "impl<",
660        "type ",
661        "pub type ",
662        "pub(crate) type ",
663    ],
664    variables: &["let ", "let mut "],
665    imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
666    // Built-in #[test], tokio/actix async test attributes, rstest
667    tests: &[
668        "#[test]",
669        "#[tokio::test]",
670        "#[actix_web::test]",
671        "#[rstest]",
672        "#[test_case",
673    ],
674    assertions: &[
675        "assert_eq!(",
676        "assert_ne!(",
677        "assert!(",
678        "assert_matches!(",
679        "assert_err!(",
680        "assert_ok!(",
681    ],
682    test_suites: &[],
683};
684
685const SP_PYTHON: SymbolPatterns = SymbolPatterns {
686    functions: &["def ", "async def "],
687    classes: &["class "],
688    variables: &[],
689    imports: &["import ", "from "],
690    // pytest: test_ prefix functions and Test* classes; unittest: test_ methods
691    tests: &["def test_", "async def test_", "class Test"],
692    assertions: &[
693        "self.assertEqual(",
694        "self.assertNotEqual(",
695        "self.assertTrue(",
696        "self.assertFalse(",
697        "self.assertIsNone(",
698        "self.assertIsNotNone(",
699        "self.assertIn(",
700        "self.assertNotIn(",
701        "self.assertRaises(",
702        "self.assertAlmostEqual(",
703    ],
704    test_suites: &[],
705};
706
707const SP_JS: SymbolPatterns = SymbolPatterns {
708    functions: &[
709        "function ",
710        "async function ",
711        "export function ",
712        "export async function ",
713        "export default function ",
714    ],
715    classes: &["class ", "export class ", "export default class "],
716    variables: &[
717        "var ",
718        "let ",
719        "const ",
720        "export var ",
721        "export let ",
722        "export const ",
723    ],
724    imports: &["import "],
725    // Jest/Mocha/Jasmine: describe/it/test block openers
726    tests: &[
727        "describe(",
728        "it(",
729        "test(",
730        "it.each(",
731        "test.each(",
732        "describe.each(",
733    ],
734    assertions: &["expect("],
735    test_suites: &[],
736};
737
738const SP_TS: SymbolPatterns = SymbolPatterns {
739    functions: &[
740        "function ",
741        "async function ",
742        "export function ",
743        "export async function ",
744        "export default function ",
745    ],
746    classes: &[
747        "class ",
748        "export class ",
749        "export default class ",
750        "abstract class ",
751        "export abstract class ",
752        "interface ",
753        "export interface ",
754        "declare class ",
755        "declare interface ",
756    ],
757    variables: &[
758        "var ",
759        "let ",
760        "const ",
761        "export var ",
762        "export let ",
763        "export const ",
764    ],
765    imports: &["import "],
766    // Jest/Mocha/Jasmine/Vitest: describe/it/test block openers
767    tests: &[
768        "describe(",
769        "it(",
770        "test(",
771        "it.each(",
772        "test.each(",
773        "describe.each(",
774    ],
775    assertions: &["expect("],
776    test_suites: &[],
777};
778
779const SP_GO: SymbolPatterns = SymbolPatterns {
780    functions: &["func "],
781    classes: &["type "],
782    variables: &["var "],
783    imports: &["import "],
784    // Go standard testing: Test* functions (convention is practically exclusive to _test.go files)
785    tests: &["func Test", "func Benchmark", "func Fuzz"],
786    assertions: &[],
787    test_suites: &[],
788};
789
790const SP_JAVA: SymbolPatterns = SymbolPatterns {
791    functions: &[],
792    classes: &[
793        "class ",
794        "public class ",
795        "private class ",
796        "protected class ",
797        "abstract class ",
798        "final class ",
799        "public abstract class ",
800        "public final class ",
801        "interface ",
802        "public interface ",
803        "enum ",
804        "public enum ",
805        "record ",
806        "public record ",
807        "@interface ",
808    ],
809    variables: &[],
810    imports: &["import "],
811    // JUnit 4 & 5, TestNG โ€” annotations appear on their own line before the method
812    tests: &[
813        "@Test",
814        "@ParameterizedTest",
815        "@RepeatedTest",
816        "@TestFactory",
817        "@TestTemplate",
818    ],
819    assertions: &[
820        "assertEquals(",
821        "assertNotEquals(",
822        "assertTrue(",
823        "assertFalse(",
824        "assertNull(",
825        "assertNotNull(",
826        "assertThat(",
827        "assertThrows(",
828        "assertAll(",
829        "assertArrayEquals(",
830        "assertIterableEquals(",
831        "assertLinesMatch(",
832    ],
833    test_suites: &[],
834};
835
836const SP_CSHARP: SymbolPatterns = SymbolPatterns {
837    functions: &[],
838    classes: &[
839        "class ",
840        "public class ",
841        "private class ",
842        "protected class ",
843        "internal class ",
844        "abstract class ",
845        "sealed class ",
846        "static class ",
847        "partial class ",
848        "public abstract class ",
849        "public sealed class ",
850        "public static class ",
851        "interface ",
852        "public interface ",
853        "internal interface ",
854        "enum ",
855        "public enum ",
856        "struct ",
857        "public struct ",
858        "record ",
859        "public record ",
860    ],
861    variables: &["var "],
862    imports: &["using "],
863    // MSTest, NUnit, xUnit โ€” attributes on their own line before the method
864    tests: &[
865        "[TestMethod]",
866        "[Test]",
867        "[Fact]",
868        "[Theory]",
869        "[TestCase(",
870        "[DataRow(",
871        "[InlineData(",
872        "[MemberData(",
873    ],
874    assertions: &[
875        "Assert.AreEqual(",
876        "Assert.AreNotEqual(",
877        "Assert.IsTrue(",
878        "Assert.IsFalse(",
879        "Assert.IsNull(",
880        "Assert.IsNotNull(",
881        "Assert.Equal(",
882        "Assert.NotEqual(",
883        "Assert.True(",
884        "Assert.False(",
885        "Assert.That(",
886        "Assert.Contains(",
887        "Assert.Throws(",
888        "Assert.ThrowsAsync(",
889        "Assert.IsInstanceOfType(",
890    ],
891    test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
892};
893
894// GTest, Catch2/doctest, Boost.Test, Unity, Check, CMocka, CppUTest patterns for C and C++.
895const TEST_PATTERNS_C_CPP: &[&str] = &[
896    // Google Test
897    "TEST(",
898    "TEST_F(",
899    "TEST_P(",
900    "TYPED_TEST(",
901    "TYPED_TEST_P(",
902    "INSTANTIATE_TEST_SUITE_P(",
903    "INSTANTIATE_TYPED_TEST_SUITE_P(",
904    // Catch2 / doctest
905    "TEST_CASE(",
906    "SECTION(",
907    "SCENARIO(",
908    "SCENARIO_METHOD(",
909    "TEST_CASE_METHOD(",
910    // Boost.Test
911    "BOOST_AUTO_TEST_CASE(",
912    "BOOST_FIXTURE_TEST_CASE(",
913    "BOOST_AUTO_TEST_SUITE(",
914    "BOOST_PARAM_TEST_CASE(",
915    // CppUnit
916    "CPPUNIT_TEST(",
917    "CPPUNIT_TEST_SUITE(",
918    // Unity (embedded C)
919    "RUN_TEST(",
920    "TEST_IGNORE(",
921    "TEST_FAIL(",
922    // Check (libcheck โ€” embedded C)
923    "START_TEST(",
924    "tcase_add_test(",
925    "suite_create(",
926    // CMocka (embedded C)
927    "cmocka_unit_test(",
928    "cmocka_run_group_tests(",
929    // CppUTest
930    "IGNORE_TEST(",
931    "TEST_GROUP(",
932    "TEST_GROUP_BASE(",
933];
934
935// Test assertion patterns shared by C and C++.
936const ASSERT_PATTERNS_C_CPP: &[&str] = &[
937    // Google Test ASSERT_* (test-stopping failures)
938    "ASSERT_EQ(",
939    "ASSERT_NE(",
940    "ASSERT_LT(",
941    "ASSERT_LE(",
942    "ASSERT_GT(",
943    "ASSERT_GE(",
944    "ASSERT_TRUE(",
945    "ASSERT_FALSE(",
946    "ASSERT_STREQ(",
947    "ASSERT_STRNE(",
948    "ASSERT_FLOAT_EQ(",
949    "ASSERT_DOUBLE_EQ(",
950    "ASSERT_NEAR(",
951    "ASSERT_THROW(",
952    "ASSERT_NO_THROW(",
953    "ASSERT_ANY_THROW(",
954    // Google Test EXPECT_* (non-stopping failures)
955    "EXPECT_EQ(",
956    "EXPECT_NE(",
957    "EXPECT_LT(",
958    "EXPECT_LE(",
959    "EXPECT_GT(",
960    "EXPECT_GE(",
961    "EXPECT_TRUE(",
962    "EXPECT_FALSE(",
963    "EXPECT_STREQ(",
964    "EXPECT_STRNE(",
965    "EXPECT_FLOAT_EQ(",
966    "EXPECT_DOUBLE_EQ(",
967    "EXPECT_NEAR(",
968    "EXPECT_THROW(",
969    "EXPECT_NO_THROW(",
970    "EXPECT_ANY_THROW(",
971    // Catch2 / doctest assertions
972    "REQUIRE(",
973    "CHECK(",
974    "REQUIRE_FALSE(",
975    "CHECK_FALSE(",
976    "REQUIRE_NOTHROW(",
977    "CHECK_NOTHROW(",
978    "REQUIRE_THROWS(",
979    "CHECK_THROWS(",
980    "REQUIRE_THAT(",
981    "CHECK_THAT(",
982    // Unity assertions (embedded C)
983    "TEST_ASSERT_EQUAL(",
984    "TEST_ASSERT_EQUAL_INT(",
985    "TEST_ASSERT_EQUAL_STRING(",
986    "TEST_ASSERT_EQUAL_FLOAT(",
987    "TEST_ASSERT_EQUAL_DOUBLE(",
988    "TEST_ASSERT_EQUAL_PTR(",
989    "TEST_ASSERT_TRUE(",
990    "TEST_ASSERT_FALSE(",
991    "TEST_ASSERT_NULL(",
992    "TEST_ASSERT_NOT_NULL(",
993    "TEST_ASSERT_BITS_HIGH(",
994    "TEST_ASSERT_BITS_LOW(",
995    // CMocka assertions (embedded C)
996    "assert_int_equal(",
997    "assert_int_not_equal(",
998    "assert_string_equal(",
999    "assert_string_not_equal(",
1000    "assert_true(",
1001    "assert_false(",
1002    "assert_null(",
1003    "assert_non_null(",
1004    "assert_ptr_equal(",
1005    "assert_memory_equal(",
1006    "assert_return_code(",
1007];
1008
1009// Test suite/group declaration patterns for C and C++.
1010const SUITE_PATTERNS_C_CPP: &[&str] = &[
1011    "TEST_GROUP(",
1012    "TEST_GROUP_BASE(",
1013    "BOOST_AUTO_TEST_SUITE(",
1014    "CPPUNIT_TEST_SUITE(",
1015    "CPPUNIT_TEST_SUITE_END(",
1016];
1017
1018const SP_C: SymbolPatterns = SymbolPatterns {
1019    functions: &[],
1020    classes: &[
1021        "struct ",
1022        "typedef struct ",
1023        "union ",
1024        "typedef union ",
1025        "typedef enum ",
1026    ],
1027    variables: &[],
1028    imports: &["#include "],
1029    tests: TEST_PATTERNS_C_CPP,
1030    assertions: ASSERT_PATTERNS_C_CPP,
1031    test_suites: SUITE_PATTERNS_C_CPP,
1032};
1033
1034const SP_CPP: SymbolPatterns = SymbolPatterns {
1035    functions: &[],
1036    classes: &["class ", "struct ", "namespace ", "template "],
1037    variables: &[],
1038    imports: &["#include "],
1039    tests: TEST_PATTERNS_C_CPP,
1040    assertions: ASSERT_PATTERNS_C_CPP,
1041    test_suites: SUITE_PATTERNS_C_CPP,
1042};
1043
1044const SP_SHELL: SymbolPatterns = SymbolPatterns {
1045    functions: &["function "],
1046    classes: &[],
1047    variables: &["declare ", "local ", "export "],
1048    imports: &["source ", ". "],
1049    tests: &[],
1050    assertions: &[],
1051    test_suites: &[],
1052};
1053
1054const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1055    functions: &["function ", "Function "],
1056    classes: &["class "],
1057    variables: &[],
1058    imports: &["Import-Module ", "using "],
1059    // Pester test framework
1060    tests: &["Describe ", "It ", "Context "],
1061    assertions: &[],
1062    test_suites: &[],
1063};
1064
1065const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1066    functions: &[
1067        "fun ",
1068        "private fun ",
1069        "public fun ",
1070        "protected fun ",
1071        "internal fun ",
1072        "override fun ",
1073        "suspend fun ",
1074        "abstract fun ",
1075        "open fun ",
1076        "private suspend fun ",
1077        "public suspend fun ",
1078    ],
1079    classes: &[
1080        "class ",
1081        "data class ",
1082        "sealed class ",
1083        "abstract class ",
1084        "open class ",
1085        "object ",
1086        "companion object",
1087        "interface ",
1088        "enum class ",
1089        "annotation class ",
1090    ],
1091    variables: &["val ", "var ", "private val ", "private var ", "const val "],
1092    imports: &["import "],
1093    // JUnit 4/5, KotlinTest, Kotest
1094    tests: &[
1095        "@Test",
1096        "@ParameterizedTest",
1097        "@RepeatedTest",
1098        "\"should ",
1099        "\"it ",
1100    ],
1101    assertions: &[
1102        "assertEquals(",
1103        "assertNotEquals(",
1104        "assertTrue(",
1105        "assertFalse(",
1106        "assertNull(",
1107        "assertNotNull(",
1108        "assertThat(",
1109        "assertThrows(",
1110        "shouldBe(",
1111        "shouldNotBe(",
1112        "shouldThrow(",
1113    ],
1114    test_suites: &[],
1115};
1116
1117const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1118    functions: &[
1119        "func ",
1120        "private func ",
1121        "public func ",
1122        "internal func ",
1123        "override func ",
1124        "open func ",
1125        "static func ",
1126        "class func ",
1127        "mutating func ",
1128        "private static func ",
1129        "public static func ",
1130    ],
1131    classes: &[
1132        "class ",
1133        "struct ",
1134        "protocol ",
1135        "enum ",
1136        "extension ",
1137        "actor ",
1138        "public class ",
1139        "private class ",
1140        "open class ",
1141        "final class ",
1142        "public struct ",
1143        "private struct ",
1144        "public protocol ",
1145    ],
1146    variables: &[
1147        "var ",
1148        "let ",
1149        "private var ",
1150        "private let ",
1151        "static var ",
1152        "static let ",
1153    ],
1154    imports: &["import "],
1155    // XCTest: test functions are named test* by convention; Swift Testing: @Test attribute
1156    tests: &["func test", "func Test", "@Test"],
1157    assertions: &[
1158        "XCTAssertEqual(",
1159        "XCTAssertNotEqual(",
1160        "XCTAssertTrue(",
1161        "XCTAssertFalse(",
1162        "XCTAssertNil(",
1163        "XCTAssertNotNil(",
1164        "XCTAssertGreaterThan(",
1165        "XCTAssertLessThan(",
1166        "XCTAssertThrowsError(",
1167        "XCTAssertNoThrow(",
1168        "#expect(",
1169    ],
1170    test_suites: &[],
1171};
1172
1173const SP_RUBY: SymbolPatterns = SymbolPatterns {
1174    functions: &["def ", "private def ", "protected def "],
1175    classes: &["class ", "module "],
1176    variables: &[],
1177    imports: &["require ", "require_relative "],
1178    // RSpec / minitest
1179    tests: &["it ", "it(", "describe ", "context ", "test "],
1180    assertions: &[],
1181    test_suites: &[],
1182};
1183
1184const SP_SCALA: SymbolPatterns = SymbolPatterns {
1185    functions: &["def ", "private def ", "protected def ", "override def "],
1186    classes: &[
1187        "class ",
1188        "case class ",
1189        "abstract class ",
1190        "sealed class ",
1191        "object ",
1192        "trait ",
1193    ],
1194    variables: &["val ", "var ", "lazy val "],
1195    imports: &["import "],
1196    // ScalaTest / MUnit: FunSuite test("..."), FlatSpec it("..."), AnyWordSpec "..." should
1197    tests: &["test(", "it(", "describe("],
1198    assertions: &[],
1199    test_suites: &[],
1200};
1201
1202const SP_PHP: SymbolPatterns = SymbolPatterns {
1203    functions: &[
1204        "function ",
1205        "public function ",
1206        "private function ",
1207        "protected function ",
1208        "static function ",
1209        "abstract function ",
1210        "final function ",
1211        "public static function ",
1212        "private static function ",
1213        "protected static function ",
1214    ],
1215    classes: &[
1216        "class ",
1217        "abstract class ",
1218        "final class ",
1219        "interface ",
1220        "trait ",
1221        "enum ",
1222    ],
1223    variables: &[],
1224    imports: &[
1225        "use ",
1226        "require ",
1227        "require_once ",
1228        "include ",
1229        "include_once ",
1230    ],
1231    // PHPUnit: test methods start with test, or use @test annotation
1232    tests: &[
1233        "public function test",
1234        "function test",
1235        "#[Test]",
1236        "#[DataProvider(",
1237    ],
1238    assertions: &[],
1239    test_suites: &[],
1240};
1241
1242const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1243    functions: &[
1244        "def ",
1245        "defp ",
1246        "defmacro ",
1247        "defmacrop ",
1248        "defguard ",
1249        "defguardp ",
1250    ],
1251    classes: &["defmodule ", "defprotocol ", "defimpl "],
1252    variables: &[],
1253    imports: &["import ", "alias ", "use ", "require "],
1254    // ExUnit
1255    tests: &["test ", "describe "],
1256    assertions: &[],
1257    test_suites: &[],
1258};
1259
1260const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1261    functions: &[],
1262    classes: &["-module("],
1263    variables: &[],
1264    imports: &["-import(", "-include(", "-include_lib("],
1265    tests: &[],
1266    assertions: &[],
1267    test_suites: &[],
1268};
1269
1270const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1271    functions: &[
1272        "let ",
1273        "let rec ",
1274        "member ",
1275        "override ",
1276        "abstract member ",
1277    ],
1278    classes: &["type "],
1279    variables: &["let mutable "],
1280    imports: &["open "],
1281    // NUnit / xUnit attributes on their own line; FsUnit uses [<Test>] / [<Fact>]
1282    tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1283    assertions: &[],
1284    test_suites: &[],
1285};
1286
1287const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1288    functions: &["def ", "private def ", "public def ", "protected def "],
1289    classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1290    variables: &[],
1291    imports: &["import "],
1292    // Spock framework: feature methods; JUnit annotations
1293    tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1294    assertions: &[],
1295    test_suites: &[],
1296};
1297
1298const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1299    functions: &[],
1300    classes: &["class ", "data ", "newtype ", "type "],
1301    variables: &[],
1302    imports: &["import "],
1303    tests: &[],
1304    assertions: &[],
1305    test_suites: &[],
1306};
1307
1308const SP_LUA: SymbolPatterns = SymbolPatterns {
1309    functions: &["function ", "local function "],
1310    classes: &[],
1311    variables: &["local "],
1312    imports: &[],
1313    // busted test framework
1314    tests: &["it(", "describe(", "pending("],
1315    assertions: &[],
1316    test_suites: &[],
1317};
1318
1319const SP_NIM: SymbolPatterns = SymbolPatterns {
1320    functions: &[
1321        "proc ",
1322        "func ",
1323        "method ",
1324        "iterator ",
1325        "converter ",
1326        "template ",
1327        "macro ",
1328    ],
1329    classes: &["type "],
1330    variables: &["var ", "let ", "const "],
1331    imports: &["import ", "from "],
1332    // unittest module
1333    tests: &["test "],
1334    assertions: &[],
1335    test_suites: &[],
1336};
1337
1338const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1339    functions: &["- (", "+ ("],
1340    classes: &["@interface ", "@implementation ", "@protocol "],
1341    variables: &[],
1342    imports: &["#import ", "#include "],
1343    // XCTest: test methods start with - (void)test
1344    tests: &["- (void)test"],
1345    assertions: &[
1346        "XCTAssertEqual(",
1347        "XCTAssertNotEqual(",
1348        "XCTAssertTrue(",
1349        "XCTAssertFalse(",
1350        "XCTAssertNil(",
1351        "XCTAssertNotNil(",
1352        "XCTAssertGreaterThan(",
1353        "XCTAssertLessThan(",
1354        "XCTAssertThrowsError(",
1355        "XCTAssertNoThrow(",
1356    ],
1357    test_suites: &[],
1358};
1359
1360const SP_OCAML: SymbolPatterns = SymbolPatterns {
1361    functions: &["let ", "let rec "],
1362    classes: &["type ", "module ", "class "],
1363    variables: &[],
1364    imports: &["open "],
1365    tests: &[],
1366    assertions: &[],
1367    test_suites: &[],
1368};
1369
1370const SP_PERL: SymbolPatterns = SymbolPatterns {
1371    functions: &["sub "],
1372    classes: &["package "],
1373    variables: &["my ", "our ", "local "],
1374    imports: &["use ", "require "],
1375    tests: &[],
1376    assertions: &[],
1377    test_suites: &[],
1378};
1379
1380const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1381    functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1382    classes: &[
1383        "(defrecord ",
1384        "(defprotocol ",
1385        "(deftype ",
1386        "(definterface ",
1387    ],
1388    variables: &["(def ", "(defonce "],
1389    imports: &["(ns ", "(require "],
1390    // clojure.test
1391    tests: &["(deftest ", "(testing "],
1392    assertions: &[],
1393    test_suites: &[],
1394};
1395
1396const SP_JULIA: SymbolPatterns = SymbolPatterns {
1397    functions: &["function ", "macro "],
1398    classes: &[
1399        "struct ",
1400        "mutable struct ",
1401        "abstract type ",
1402        "primitive type ",
1403    ],
1404    variables: &["const "],
1405    imports: &["import ", "using "],
1406    // Test.jl standard library
1407    tests: &["@test ", "@testset "],
1408    assertions: &[],
1409    test_suites: &[],
1410};
1411
1412const SP_DART: SymbolPatterns = SymbolPatterns {
1413    functions: &[],
1414    classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1415    variables: &["var ", "final ", "const ", "late "],
1416    imports: &["import "],
1417    // flutter_test / test package
1418    tests: &["test(", "testWidgets(", "group("],
1419    assertions: &[],
1420    test_suites: &[],
1421};
1422
1423const SP_R: SymbolPatterns = SymbolPatterns {
1424    functions: &[],
1425    classes: &[],
1426    variables: &[],
1427    imports: &["library(", "source("],
1428    // testthat
1429    tests: &["test_that(", "it(", "describe(", "expect_"],
1430    assertions: &[],
1431    test_suites: &[],
1432};
1433
1434const SP_SQL: SymbolPatterns = SymbolPatterns {
1435    functions: &[
1436        "create function ",
1437        "create or replace function ",
1438        "create procedure ",
1439        "create or replace procedure ",
1440        "CREATE FUNCTION ",
1441        "CREATE OR REPLACE FUNCTION ",
1442        "CREATE PROCEDURE ",
1443        "CREATE OR REPLACE PROCEDURE ",
1444    ],
1445    classes: &[
1446        "create table ",
1447        "create view ",
1448        "create schema ",
1449        "CREATE TABLE ",
1450        "CREATE VIEW ",
1451        "CREATE SCHEMA ",
1452    ],
1453    variables: &["declare ", "DECLARE "],
1454    imports: &[],
1455    tests: &[],
1456    assertions: &[],
1457    test_suites: &[],
1458};
1459
1460const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1461    functions: &["proc ", "PROC "],
1462    classes: &[],
1463    variables: &[],
1464    imports: &["include ", "INCLUDE ", "%include "],
1465    tests: &[],
1466    assertions: &[],
1467    test_suites: &[],
1468};
1469
1470const SP_ZIG: SymbolPatterns = SymbolPatterns {
1471    functions: &[
1472        "fn ",
1473        "pub fn ",
1474        "export fn ",
1475        "inline fn ",
1476        "pub inline fn ",
1477    ],
1478    classes: &[],
1479    variables: &["var ", "pub var "],
1480    imports: &[],
1481    // Zig built-in test blocks
1482    tests: &["test \"", "test{"],
1483    assertions: &[],
1484    test_suites: &[],
1485};
1486
1487/// Static (non-heap) language scanning parameters.  All fields are `'static` so this struct
1488/// can be stored in a `static` array.  The dynamic `skip_lines` set (used only for Python
1489/// docstring detection) is kept in `ScanConfig` and populated by the caller after lookup.
1490#[allow(clippy::struct_excessive_bools)]
1491#[derive(Clone, Copy)]
1492struct StaticLangConfig {
1493    line_comments: &'static [&'static str],
1494    block_comment: Option<(&'static str, &'static str)>,
1495    allow_single_quote_strings: bool,
1496    allow_double_quote_strings: bool,
1497    allow_triple_quote_strings: bool,
1498    allow_csharp_verbatim_strings: bool,
1499    symbol_patterns: SymbolPatterns,
1500    /// `true` for C, C++, and Objective-C (languages that have a C preprocessor).
1501    has_preprocessor: bool,
1502}
1503
1504#[allow(clippy::struct_excessive_bools)]
1505#[derive(Debug, Clone)]
1506struct ScanConfig {
1507    line_comments: &'static [&'static str],
1508    block_comment: Option<(&'static str, &'static str)>,
1509    allow_single_quote_strings: bool,
1510    allow_double_quote_strings: bool,
1511    allow_triple_quote_strings: bool,
1512    allow_csharp_verbatim_strings: bool,
1513    skip_lines: HashSet<usize>,
1514    symbol_patterns: SymbolPatterns,
1515}
1516
1517/// Static language-scan configuration table โ€” one entry per supported language.
1518/// Used by `language_scan_config` to avoid a 41-arm match.  All `SP_*` constants
1519/// referenced here are defined above in the same module.
1520static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1521    (
1522        Language::C,
1523        StaticLangConfig {
1524            line_comments: &["//"],
1525            block_comment: Some(("/*", "*/")),
1526            allow_single_quote_strings: true,
1527            allow_double_quote_strings: true,
1528            allow_triple_quote_strings: false,
1529            allow_csharp_verbatim_strings: false,
1530            symbol_patterns: SP_C,
1531            has_preprocessor: true,
1532        },
1533    ),
1534    (
1535        Language::Cpp,
1536        StaticLangConfig {
1537            line_comments: &["//"],
1538            block_comment: Some(("/*", "*/")),
1539            allow_single_quote_strings: true,
1540            allow_double_quote_strings: true,
1541            allow_triple_quote_strings: false,
1542            allow_csharp_verbatim_strings: false,
1543            symbol_patterns: SP_CPP,
1544            has_preprocessor: true,
1545        },
1546    ),
1547    (
1548        Language::ObjectiveC,
1549        StaticLangConfig {
1550            line_comments: &["//"],
1551            block_comment: Some(("/*", "*/")),
1552            allow_single_quote_strings: true,
1553            allow_double_quote_strings: true,
1554            allow_triple_quote_strings: false,
1555            allow_csharp_verbatim_strings: false,
1556            symbol_patterns: SP_OBJECTIVEC,
1557            has_preprocessor: true,
1558        },
1559    ),
1560    (
1561        Language::CSharp,
1562        StaticLangConfig {
1563            line_comments: &["//"],
1564            block_comment: Some(("/*", "*/")),
1565            allow_single_quote_strings: true,
1566            allow_double_quote_strings: true,
1567            allow_triple_quote_strings: false,
1568            allow_csharp_verbatim_strings: true,
1569            symbol_patterns: SP_CSHARP,
1570            has_preprocessor: false,
1571        },
1572    ),
1573    (
1574        Language::Go,
1575        StaticLangConfig {
1576            line_comments: &["//"],
1577            block_comment: Some(("/*", "*/")),
1578            allow_single_quote_strings: true,
1579            allow_double_quote_strings: true,
1580            allow_triple_quote_strings: false,
1581            allow_csharp_verbatim_strings: false,
1582            symbol_patterns: SP_GO,
1583            has_preprocessor: false,
1584        },
1585    ),
1586    (
1587        Language::Java,
1588        StaticLangConfig {
1589            line_comments: &["//"],
1590            block_comment: Some(("/*", "*/")),
1591            allow_single_quote_strings: true,
1592            allow_double_quote_strings: true,
1593            allow_triple_quote_strings: false,
1594            allow_csharp_verbatim_strings: false,
1595            symbol_patterns: SP_JAVA,
1596            has_preprocessor: false,
1597        },
1598    ),
1599    (
1600        Language::JavaScript,
1601        StaticLangConfig {
1602            line_comments: &["//"],
1603            block_comment: Some(("/*", "*/")),
1604            allow_single_quote_strings: true,
1605            allow_double_quote_strings: true,
1606            allow_triple_quote_strings: false,
1607            allow_csharp_verbatim_strings: false,
1608            symbol_patterns: SP_JS,
1609            has_preprocessor: false,
1610        },
1611    ),
1612    (
1613        Language::Svelte,
1614        StaticLangConfig {
1615            line_comments: &["//"],
1616            block_comment: Some(("/*", "*/")),
1617            allow_single_quote_strings: true,
1618            allow_double_quote_strings: true,
1619            allow_triple_quote_strings: false,
1620            allow_csharp_verbatim_strings: false,
1621            symbol_patterns: SP_JS,
1622            has_preprocessor: false,
1623        },
1624    ),
1625    (
1626        Language::Vue,
1627        StaticLangConfig {
1628            line_comments: &["//"],
1629            block_comment: Some(("/*", "*/")),
1630            allow_single_quote_strings: true,
1631            allow_double_quote_strings: true,
1632            allow_triple_quote_strings: false,
1633            allow_csharp_verbatim_strings: false,
1634            symbol_patterns: SP_JS,
1635            has_preprocessor: false,
1636        },
1637    ),
1638    (
1639        Language::Rust,
1640        StaticLangConfig {
1641            line_comments: &["//"],
1642            block_comment: Some(("/*", "*/")),
1643            allow_single_quote_strings: false,
1644            allow_double_quote_strings: true,
1645            allow_triple_quote_strings: false,
1646            allow_csharp_verbatim_strings: false,
1647            symbol_patterns: SP_RUST,
1648            has_preprocessor: false,
1649        },
1650    ),
1651    (
1652        Language::Shell,
1653        StaticLangConfig {
1654            line_comments: &["#"],
1655            block_comment: None,
1656            allow_single_quote_strings: true,
1657            allow_double_quote_strings: true,
1658            allow_triple_quote_strings: false,
1659            allow_csharp_verbatim_strings: false,
1660            symbol_patterns: SP_SHELL,
1661            has_preprocessor: false,
1662        },
1663    ),
1664    (
1665        Language::PowerShell,
1666        StaticLangConfig {
1667            line_comments: &["#"],
1668            block_comment: Some(("<#", "#>")),
1669            allow_single_quote_strings: true,
1670            allow_double_quote_strings: true,
1671            allow_triple_quote_strings: false,
1672            allow_csharp_verbatim_strings: false,
1673            symbol_patterns: SP_POWERSHELL,
1674            has_preprocessor: false,
1675        },
1676    ),
1677    (
1678        Language::TypeScript,
1679        StaticLangConfig {
1680            line_comments: &["//"],
1681            block_comment: Some(("/*", "*/")),
1682            allow_single_quote_strings: true,
1683            allow_double_quote_strings: true,
1684            allow_triple_quote_strings: false,
1685            allow_csharp_verbatim_strings: false,
1686            symbol_patterns: SP_TS,
1687            has_preprocessor: false,
1688        },
1689    ),
1690    (
1691        Language::Python,
1692        StaticLangConfig {
1693            line_comments: &["#"],
1694            block_comment: None,
1695            allow_single_quote_strings: true,
1696            allow_double_quote_strings: true,
1697            allow_triple_quote_strings: true,
1698            allow_csharp_verbatim_strings: false,
1699            symbol_patterns: SP_PYTHON,
1700            has_preprocessor: false,
1701        },
1702    ),
1703    (
1704        Language::Assembly,
1705        StaticLangConfig {
1706            line_comments: &[";"],
1707            block_comment: None,
1708            allow_single_quote_strings: false,
1709            allow_double_quote_strings: false,
1710            allow_triple_quote_strings: false,
1711            allow_csharp_verbatim_strings: false,
1712            symbol_patterns: SP_ASSEMBLY,
1713            has_preprocessor: false,
1714        },
1715    ),
1716    (
1717        Language::Clojure,
1718        StaticLangConfig {
1719            line_comments: &[";"],
1720            block_comment: None,
1721            allow_single_quote_strings: false,
1722            allow_double_quote_strings: true,
1723            allow_triple_quote_strings: false,
1724            allow_csharp_verbatim_strings: false,
1725            symbol_patterns: SP_CLOJURE,
1726            has_preprocessor: false,
1727        },
1728    ),
1729    (
1730        Language::Css,
1731        StaticLangConfig {
1732            line_comments: &[],
1733            block_comment: Some(("/*", "*/")),
1734            allow_single_quote_strings: true,
1735            allow_double_quote_strings: true,
1736            allow_triple_quote_strings: false,
1737            allow_csharp_verbatim_strings: false,
1738            symbol_patterns: SP_NONE,
1739            has_preprocessor: false,
1740        },
1741    ),
1742    (
1743        Language::Dart,
1744        StaticLangConfig {
1745            line_comments: &["//"],
1746            block_comment: Some(("/*", "*/")),
1747            allow_single_quote_strings: true,
1748            allow_double_quote_strings: true,
1749            allow_triple_quote_strings: false,
1750            allow_csharp_verbatim_strings: false,
1751            symbol_patterns: SP_DART,
1752            has_preprocessor: false,
1753        },
1754    ),
1755    (
1756        Language::Dockerfile,
1757        StaticLangConfig {
1758            line_comments: &["#"],
1759            block_comment: None,
1760            allow_single_quote_strings: false,
1761            allow_double_quote_strings: false,
1762            allow_triple_quote_strings: false,
1763            allow_csharp_verbatim_strings: false,
1764            symbol_patterns: SP_NONE,
1765            has_preprocessor: false,
1766        },
1767    ),
1768    (
1769        Language::Elixir,
1770        StaticLangConfig {
1771            line_comments: &["#"],
1772            block_comment: None,
1773            allow_single_quote_strings: true,
1774            allow_double_quote_strings: true,
1775            allow_triple_quote_strings: false,
1776            allow_csharp_verbatim_strings: false,
1777            symbol_patterns: SP_ELIXIR,
1778            has_preprocessor: false,
1779        },
1780    ),
1781    (
1782        Language::Erlang,
1783        StaticLangConfig {
1784            line_comments: &["%"],
1785            block_comment: None,
1786            allow_single_quote_strings: false,
1787            allow_double_quote_strings: true,
1788            allow_triple_quote_strings: false,
1789            allow_csharp_verbatim_strings: false,
1790            symbol_patterns: SP_ERLANG,
1791            has_preprocessor: false,
1792        },
1793    ),
1794    (
1795        Language::FSharp,
1796        StaticLangConfig {
1797            line_comments: &["//"],
1798            block_comment: Some(("(*", "*)")),
1799            allow_single_quote_strings: false,
1800            allow_double_quote_strings: true,
1801            allow_triple_quote_strings: false,
1802            allow_csharp_verbatim_strings: false,
1803            symbol_patterns: SP_FSHARP,
1804            has_preprocessor: false,
1805        },
1806    ),
1807    (
1808        Language::Groovy,
1809        StaticLangConfig {
1810            line_comments: &["//"],
1811            block_comment: Some(("/*", "*/")),
1812            allow_single_quote_strings: true,
1813            allow_double_quote_strings: true,
1814            allow_triple_quote_strings: false,
1815            allow_csharp_verbatim_strings: false,
1816            symbol_patterns: SP_GROOVY,
1817            has_preprocessor: false,
1818        },
1819    ),
1820    (
1821        Language::Haskell,
1822        StaticLangConfig {
1823            line_comments: &["--"],
1824            block_comment: Some(("{-", "-}")),
1825            allow_single_quote_strings: true,
1826            allow_double_quote_strings: true,
1827            allow_triple_quote_strings: false,
1828            allow_csharp_verbatim_strings: false,
1829            symbol_patterns: SP_HASKELL,
1830            has_preprocessor: false,
1831        },
1832    ),
1833    (
1834        Language::Html,
1835        StaticLangConfig {
1836            line_comments: &[],
1837            block_comment: Some(("<!--", "-->")),
1838            allow_single_quote_strings: false,
1839            allow_double_quote_strings: false,
1840            allow_triple_quote_strings: false,
1841            allow_csharp_verbatim_strings: false,
1842            symbol_patterns: SP_NONE,
1843            has_preprocessor: false,
1844        },
1845    ),
1846    (
1847        Language::Julia,
1848        StaticLangConfig {
1849            line_comments: &["#"],
1850            block_comment: Some(("#=", "=#")),
1851            allow_single_quote_strings: false,
1852            allow_double_quote_strings: true,
1853            allow_triple_quote_strings: true,
1854            allow_csharp_verbatim_strings: false,
1855            symbol_patterns: SP_JULIA,
1856            has_preprocessor: false,
1857        },
1858    ),
1859    (
1860        Language::Kotlin,
1861        StaticLangConfig {
1862            line_comments: &["//"],
1863            block_comment: Some(("/*", "*/")),
1864            allow_single_quote_strings: true,
1865            allow_double_quote_strings: true,
1866            allow_triple_quote_strings: false,
1867            allow_csharp_verbatim_strings: false,
1868            symbol_patterns: SP_KOTLIN,
1869            has_preprocessor: false,
1870        },
1871    ),
1872    (
1873        Language::Lua,
1874        StaticLangConfig {
1875            line_comments: &["--"],
1876            block_comment: Some(("--[[", "]]")),
1877            allow_single_quote_strings: true,
1878            allow_double_quote_strings: true,
1879            allow_triple_quote_strings: false,
1880            allow_csharp_verbatim_strings: false,
1881            symbol_patterns: SP_LUA,
1882            has_preprocessor: false,
1883        },
1884    ),
1885    (
1886        Language::Makefile,
1887        StaticLangConfig {
1888            line_comments: &["#"],
1889            block_comment: None,
1890            allow_single_quote_strings: false,
1891            allow_double_quote_strings: false,
1892            allow_triple_quote_strings: false,
1893            allow_csharp_verbatim_strings: false,
1894            symbol_patterns: SP_NONE,
1895            has_preprocessor: false,
1896        },
1897    ),
1898    (
1899        Language::Nim,
1900        StaticLangConfig {
1901            line_comments: &["#"],
1902            block_comment: Some(("#[", "]#")),
1903            allow_single_quote_strings: true,
1904            allow_double_quote_strings: true,
1905            allow_triple_quote_strings: false,
1906            allow_csharp_verbatim_strings: false,
1907            symbol_patterns: SP_NIM,
1908            has_preprocessor: false,
1909        },
1910    ),
1911    (
1912        Language::Ocaml,
1913        StaticLangConfig {
1914            line_comments: &[],
1915            block_comment: Some(("(*", "*)")),
1916            allow_single_quote_strings: false,
1917            allow_double_quote_strings: true,
1918            allow_triple_quote_strings: false,
1919            allow_csharp_verbatim_strings: false,
1920            symbol_patterns: SP_OCAML,
1921            has_preprocessor: false,
1922        },
1923    ),
1924    (
1925        Language::Perl,
1926        StaticLangConfig {
1927            line_comments: &["#"],
1928            block_comment: None,
1929            allow_single_quote_strings: true,
1930            allow_double_quote_strings: true,
1931            allow_triple_quote_strings: false,
1932            allow_csharp_verbatim_strings: false,
1933            symbol_patterns: SP_PERL,
1934            has_preprocessor: false,
1935        },
1936    ),
1937    (
1938        Language::Php,
1939        StaticLangConfig {
1940            line_comments: &["//", "#"],
1941            block_comment: Some(("/*", "*/")),
1942            allow_single_quote_strings: true,
1943            allow_double_quote_strings: true,
1944            allow_triple_quote_strings: false,
1945            allow_csharp_verbatim_strings: false,
1946            symbol_patterns: SP_PHP,
1947            has_preprocessor: false,
1948        },
1949    ),
1950    (
1951        Language::R,
1952        StaticLangConfig {
1953            line_comments: &["#"],
1954            block_comment: None,
1955            allow_single_quote_strings: true,
1956            allow_double_quote_strings: true,
1957            allow_triple_quote_strings: false,
1958            allow_csharp_verbatim_strings: false,
1959            symbol_patterns: SP_R,
1960            has_preprocessor: false,
1961        },
1962    ),
1963    (
1964        Language::Ruby,
1965        StaticLangConfig {
1966            line_comments: &["#"],
1967            block_comment: None,
1968            allow_single_quote_strings: true,
1969            allow_double_quote_strings: true,
1970            allow_triple_quote_strings: false,
1971            allow_csharp_verbatim_strings: false,
1972            symbol_patterns: SP_RUBY,
1973            has_preprocessor: false,
1974        },
1975    ),
1976    (
1977        Language::Scala,
1978        StaticLangConfig {
1979            line_comments: &["//"],
1980            block_comment: Some(("/*", "*/")),
1981            allow_single_quote_strings: true,
1982            allow_double_quote_strings: true,
1983            allow_triple_quote_strings: false,
1984            allow_csharp_verbatim_strings: false,
1985            symbol_patterns: SP_SCALA,
1986            has_preprocessor: false,
1987        },
1988    ),
1989    (
1990        Language::Scss,
1991        StaticLangConfig {
1992            line_comments: &["//"],
1993            block_comment: Some(("/*", "*/")),
1994            allow_single_quote_strings: true,
1995            allow_double_quote_strings: true,
1996            allow_triple_quote_strings: false,
1997            allow_csharp_verbatim_strings: false,
1998            symbol_patterns: SP_NONE,
1999            has_preprocessor: false,
2000        },
2001    ),
2002    (
2003        Language::Sql,
2004        StaticLangConfig {
2005            line_comments: &["--"],
2006            block_comment: Some(("/*", "*/")),
2007            allow_single_quote_strings: true,
2008            allow_double_quote_strings: false,
2009            allow_triple_quote_strings: false,
2010            allow_csharp_verbatim_strings: false,
2011            symbol_patterns: SP_SQL,
2012            has_preprocessor: false,
2013        },
2014    ),
2015    (
2016        Language::Swift,
2017        StaticLangConfig {
2018            line_comments: &["//"],
2019            block_comment: Some(("/*", "*/")),
2020            allow_single_quote_strings: false,
2021            allow_double_quote_strings: true,
2022            allow_triple_quote_strings: false,
2023            allow_csharp_verbatim_strings: false,
2024            symbol_patterns: SP_SWIFT,
2025            has_preprocessor: false,
2026        },
2027    ),
2028    (
2029        Language::Xml,
2030        StaticLangConfig {
2031            line_comments: &[],
2032            block_comment: Some(("<!--", "-->")),
2033            allow_single_quote_strings: false,
2034            allow_double_quote_strings: false,
2035            allow_triple_quote_strings: false,
2036            allow_csharp_verbatim_strings: false,
2037            symbol_patterns: SP_NONE,
2038            has_preprocessor: false,
2039        },
2040    ),
2041    (
2042        Language::Zig,
2043        StaticLangConfig {
2044            line_comments: &["//"],
2045            block_comment: None,
2046            allow_single_quote_strings: true,
2047            allow_double_quote_strings: true,
2048            allow_triple_quote_strings: false,
2049            allow_csharp_verbatim_strings: false,
2050            symbol_patterns: SP_ZIG,
2051            has_preprocessor: false,
2052        },
2053    ),
2054];
2055
2056/// Per-call IEEE 1045-1992 flags derived from `AnalysisOptions` plus per-language properties.
2057/// Private to this crate; constructed inside `analyze_text`.
2058#[derive(Debug, Clone, Copy)]
2059struct IeeeFlags {
2060    /// True for C, C++, and Objective-C โ€” languages with a C preprocessor.
2061    has_preprocessor_directives: bool,
2062    /// Mirrors `AnalysisOptions::blank_in_block_comment_as_comment`.
2063    blank_in_block_comment_as_comment: bool,
2064    /// Mirrors `AnalysisOptions::collapse_continuation_lines`.
2065    collapse_continuation_lines: bool,
2066}
2067
2068#[derive(Debug, Clone, Copy)]
2069enum StringState {
2070    Single(char),
2071    Triple(&'static str),
2072    VerbatimDouble,
2073}
2074
2075#[allow(clippy::struct_excessive_bools)]
2076#[derive(Debug, Default)]
2077struct LineFacts {
2078    has_code: bool,
2079    has_single_comment: bool,
2080    has_multi_comment: bool,
2081    has_docstring: bool,
2082}
2083
2084/// Process one character while the lexer is inside a string literal.
2085///
2086/// Returns `(new_string_state, advance)` where `advance` is the number of chars to skip.
2087fn process_string_char(
2088    state: StringState,
2089    chars: &[char],
2090    i: usize,
2091) -> (Option<StringState>, usize) {
2092    match state {
2093        StringState::Single(delim) => {
2094            if chars[i] == '\\' {
2095                return (Some(state), 2); // skip escaped character
2096            }
2097            if chars[i] == delim {
2098                (None, 1)
2099            } else {
2100                (Some(state), 1)
2101            }
2102        }
2103        StringState::Triple(delim) => {
2104            if starts_with(chars, i, delim) {
2105                (None, delim.len())
2106            } else {
2107                (Some(state), 1)
2108            }
2109        }
2110        StringState::VerbatimDouble => {
2111            if starts_with(chars, i, "\"\"") {
2112                return (Some(state), 2); // escaped quote-quote inside verbatim string
2113            }
2114            if chars[i] == '"' {
2115                (None, 1)
2116            } else {
2117                (Some(state), 1)
2118            }
2119        }
2120    }
2121}
2122
2123/// Process one character while the lexer is inside a block comment.
2124///
2125/// Returns `(still_in_block_comment, advance)`.
2126fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2127    if starts_with(chars, i, close) {
2128        (false, close.len())
2129    } else {
2130        (true, 1)
2131    }
2132}
2133
2134/// Attempt to begin a new string literal at position `i`.
2135///
2136/// Returns `Some((new_state, advance))` when a string opener is detected, else `None`.
2137fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2138    if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2139        return Some((StringState::VerbatimDouble, 2));
2140    }
2141    if config.allow_triple_quote_strings {
2142        if starts_with(chars, i, "\"\"\"") {
2143            return Some((StringState::Triple("\"\"\""), 3));
2144        }
2145        if starts_with(chars, i, "'''") {
2146            return Some((StringState::Triple("'''"), 3));
2147        }
2148    }
2149    if config.allow_single_quote_strings && chars[i] == '\'' {
2150        return Some((StringState::Single('\''), 1));
2151    }
2152    if config.allow_double_quote_strings && chars[i] == '"' {
2153        return Some((StringState::Single('"'), 1));
2154    }
2155    None
2156}
2157
2158/// Advance past one character position while inside a block comment.
2159///
2160/// Updates `in_block_comment` if the closing delimiter is found and returns the
2161/// number of characters consumed. Returns 0 when no block-comment config is set
2162/// (preserving the caller's `continue`-without-advance behaviour for that impossible state).
2163fn step_through_block_comment(
2164    chars: &[char],
2165    i: usize,
2166    block_comment: Option<(&'static str, &'static str)>,
2167    in_block_comment: &mut bool,
2168) -> usize {
2169    if let Some((_, close)) = block_comment {
2170        let (still_in, advance) = process_block_comment_char(chars, i, close);
2171        *in_block_comment = still_in;
2172        return advance;
2173    }
2174    0
2175}
2176
2177/// If the character at `i` starts a block comment, return the length of the opening
2178/// delimiter so the caller can advance past it. Returns `None` if no match.
2179fn try_open_block_comment(
2180    chars: &[char],
2181    i: usize,
2182    block_comment: Option<(&'static str, &'static str)>,
2183) -> Option<usize> {
2184    let (open, _) = block_comment?;
2185    starts_with(chars, i, open).then_some(open.len())
2186}
2187
2188/// Scan a single physical line and update `facts`, `in_block_comment`, and `string_state`.
2189///
2190/// Returns `true` when the caller should break out of the per-line loop early (line comment hit).
2191fn scan_line(
2192    chars: &[char],
2193    config: &ScanConfig,
2194    facts: &mut LineFacts,
2195    in_block_comment: &mut bool,
2196    string_state: &mut Option<StringState>,
2197) {
2198    let mut i = 0usize;
2199    while i < chars.len() {
2200        // Inside a string literal โ€” advance until the closing delimiter.
2201        if let Some(state) = *string_state {
2202            facts.has_code = true;
2203            let (new_state, advance) = process_string_char(state, chars, i);
2204            *string_state = new_state;
2205            i += advance;
2206            continue;
2207        }
2208
2209        // Inside a block comment โ€” advance until the closing delimiter.
2210        if *in_block_comment {
2211            facts.has_multi_comment = true;
2212            i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2213            continue;
2214        }
2215
2216        // Whitespace outside any string/comment โ€” skip.
2217        if chars[i].is_whitespace() {
2218            i += 1;
2219            continue;
2220        }
2221
2222        // Attempt to open a string literal.
2223        if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2224            facts.has_code = true;
2225            *string_state = Some(new_state);
2226            i += advance;
2227            continue;
2228        }
2229
2230        // Attempt to open a block comment.
2231        if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2232            facts.has_multi_comment = true;
2233            *in_block_comment = true;
2234            i += advance;
2235            continue;
2236        }
2237
2238        // Line comment โ€” rest of the line is a comment; stop scanning.
2239        if config
2240            .line_comments
2241            .iter()
2242            .any(|prefix| starts_with(chars, i, prefix))
2243        {
2244            facts.has_single_comment = true;
2245            break;
2246        }
2247
2248        // Plain code character.
2249        facts.has_code = true;
2250        i += 1;
2251    }
2252}
2253
2254/// Apply IEEE 1045-1992 ยง4.2 preprocessor-directive tracking and continuation-line merging,
2255/// then emit the finalized `LineFacts` for this physical line.
2256///
2257/// Returns `None` when the line is part of a continuation sequence and should be deferred.
2258fn finalize_line_facts(
2259    facts: LineFacts,
2260    trimmed: &str,
2261    raw: &mut RawLineCounts,
2262    ieee: IeeeFlags,
2263    in_block_comment: bool,
2264    string_state: Option<StringState>,
2265    pending_continuation: &mut Option<LineFacts>,
2266) -> Option<LineFacts> {
2267    // IEEE 1045-1992 ยง4.2: track preprocessor/compiler directive lines (C/C++/ObjC).
2268    // A directive line is a pure code line (no comment on the same physical line) whose
2269    // trimmed content starts with '#'.
2270    if ieee.has_preprocessor_directives
2271        && facts.has_code
2272        && !facts.has_single_comment
2273        && !facts.has_multi_comment
2274        && trimmed.starts_with('#')
2275    {
2276        raw.compiler_directive_lines += 1;
2277    }
2278
2279    // IEEE 1045-1992 continuation-line handling.
2280    // A line is a continuation starter when it ends with '\' outside any comment or string.
2281    let is_continuation = ieee.collapse_continuation_lines
2282        && !in_block_comment
2283        && string_state.is_none()
2284        && trimmed.ends_with('\\');
2285
2286    if is_continuation {
2287        let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2288        pending.has_code |= facts.has_code;
2289        pending.has_single_comment |= facts.has_single_comment;
2290        pending.has_multi_comment |= facts.has_multi_comment;
2291        pending.has_docstring |= facts.has_docstring;
2292        return None; // defer classification until the sequence ends
2293    }
2294
2295    // Merge any accumulated continuation facts into the final line.
2296    let emit = if let Some(pending) = pending_continuation.take() {
2297        LineFacts {
2298            has_code: pending.has_code | facts.has_code,
2299            has_single_comment: pending.has_single_comment | facts.has_single_comment,
2300            has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2301            has_docstring: pending.has_docstring | facts.has_docstring,
2302        }
2303    } else {
2304        facts
2305    };
2306    Some(emit)
2307}
2308
2309/// Scan and classify one physical line, updating all running state in place.
2310///
2311/// Pre-classified lines (present in `config.skip_lines`) are counted as docstring-comment
2312/// lines and returned early without further analysis.
2313#[allow(clippy::needless_pass_by_value)]
2314#[allow(clippy::too_many_arguments)]
2315#[allow(clippy::many_single_char_names)] // destructuring return from count_symbols; names match field roles
2316fn process_physical_line(
2317    line: &str,
2318    line_idx: usize,
2319    config: &ScanConfig,
2320    raw: &mut RawLineCounts,
2321    in_block_comment: &mut bool,
2322    string_state: &mut Option<StringState>,
2323    pending_continuation: &mut Option<LineFacts>,
2324    ieee: IeeeFlags,
2325) {
2326    raw.total_physical_lines += 1;
2327
2328    if config.skip_lines.contains(&line_idx) {
2329        raw.docstring_comment_lines += 1;
2330        return;
2331    }
2332
2333    let trimmed = line.trim();
2334    let mut facts = LineFacts::default();
2335
2336    // IEEE 1045-1992: blank lines inside block comments are comment lines by default.
2337    // When blank_in_block_comment_as_comment is false, blank lines keep their blank
2338    // classification even while inside a block comment.
2339    if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2340        facts.has_multi_comment = true;
2341    }
2342
2343    let chars: Vec<char> = line.chars().collect();
2344    scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2345
2346    let Some(emit) = finalize_line_facts(
2347        facts,
2348        trimmed,
2349        raw,
2350        ieee,
2351        *in_block_comment,
2352        *string_state,
2353        pending_continuation,
2354    ) else {
2355        return;
2356    };
2357
2358    classify_line(raw, &emit, trimmed);
2359
2360    if emit.has_code {
2361        let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2362        raw.functions += f;
2363        raw.classes += c;
2364        raw.variables += v;
2365        raw.imports += i;
2366        raw.test_count += t;
2367        raw.test_assertion_count += a;
2368        raw.test_suite_count += s;
2369    }
2370}
2371
2372#[allow(clippy::needless_pass_by_value)]
2373fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2374    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2375    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2376
2377    let mut raw = RawLineCounts::default();
2378    let mut warnings = Vec::new();
2379
2380    let mut in_block_comment = false;
2381    let mut string_state: Option<StringState> = None;
2382    // IEEE continuation-line state: accumulates facts across a backslash-continued sequence.
2383    let mut pending_continuation: Option<LineFacts> = None;
2384
2385    for (line_idx, line) in lines.iter().enumerate() {
2386        process_physical_line(
2387            line,
2388            line_idx,
2389            &config,
2390            &mut raw,
2391            &mut in_block_comment,
2392            &mut string_state,
2393            &mut pending_continuation,
2394            ieee,
2395        );
2396    }
2397
2398    // Flush any pending continuation that reaches end-of-file without a closing line.
2399    if let Some(pending) = pending_continuation.take() {
2400        classify_line(&mut raw, &pending, "");
2401    }
2402
2403    if in_block_comment {
2404        warnings.push("unclosed block comment detected; result is best effort".into());
2405    }
2406    if string_state.is_some() {
2407        warnings.push("unclosed string literal detected; result is best effort".into());
2408    }
2409
2410    RawFileAnalysis {
2411        raw,
2412        parse_mode: if warnings.is_empty() {
2413            ParseMode::Lexical
2414        } else {
2415            ParseMode::LexicalBestEffort
2416        },
2417        warnings,
2418    }
2419}
2420
2421const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2422    if facts.has_docstring {
2423        raw.docstring_comment_lines += 1;
2424    } else if !facts.has_code
2425        && !facts.has_single_comment
2426        && !facts.has_multi_comment
2427        && trimmed.is_empty()
2428    {
2429        raw.blank_only_lines += 1;
2430    } else if facts.has_code && facts.has_single_comment {
2431        raw.mixed_code_single_comment_lines += 1;
2432    } else if facts.has_code && facts.has_multi_comment {
2433        raw.mixed_code_multi_comment_lines += 1;
2434    } else if facts.has_code {
2435        raw.code_only_lines += 1;
2436    } else if facts.has_single_comment {
2437        raw.single_comment_only_lines += 1;
2438    } else if facts.has_multi_comment {
2439        raw.multi_comment_only_lines += 1;
2440    } else if trimmed.is_empty() {
2441        raw.blank_only_lines += 1;
2442    } else {
2443        raw.skipped_unknown_lines += 1;
2444    }
2445}
2446
2447fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2448    let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2449    (
2450        hit(patterns.functions),
2451        hit(patterns.classes),
2452        hit(patterns.variables),
2453        hit(patterns.imports),
2454        hit(patterns.tests),
2455        hit(patterns.assertions),
2456        hit(patterns.test_suites),
2457    )
2458}
2459
2460fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2461    let needle_chars: Vec<char> = needle.chars().collect();
2462    chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2463}
2464
2465#[derive(Debug, Clone)]
2466struct PyContext {
2467    indent: usize,
2468    expect_docstring: bool,
2469}
2470
2471/// Update `contexts` to pop any scopes that the current `indent` has outdented past.
2472fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2473    while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2474        contexts.pop();
2475    }
2476}
2477
2478/// Handle `pending_block_indent` transition: push a new docstring-expecting context when we
2479/// detect the first indented line of a new block, or cancel the pending state otherwise.
2480fn py_handle_pending_indent(
2481    pending_block_indent: &mut Option<usize>,
2482    contexts: &mut Vec<PyContext>,
2483    indent: usize,
2484    trimmed: &str,
2485) {
2486    let Some(base_indent) = *pending_block_indent else {
2487        return;
2488    };
2489    if indent > base_indent {
2490        contexts.push(PyContext {
2491            indent,
2492            expect_docstring: true,
2493        });
2494        *pending_block_indent = None;
2495    } else if !trimmed.starts_with('@') {
2496        *pending_block_indent = None;
2497    }
2498}
2499
2500/// Check whether the current line is a docstring opener in the current context.
2501///
2502/// If it is, records the line, adjusts `ctx.expect_docstring`, and optionally sets
2503/// `active_docstring` for multi-line docstrings. Returns `true` when the caller should
2504/// `continue` to the next line.
2505fn py_try_record_docstring(
2506    ctx: &mut PyContext,
2507    trimmed: &str,
2508    idx: usize,
2509    docstring_lines: &mut HashSet<usize>,
2510    active_docstring: &mut Option<(&'static str, usize)>,
2511) -> bool {
2512    if !ctx.expect_docstring {
2513        return false;
2514    }
2515    if let Some(delim) = docstring_delimiter(trimmed) {
2516        docstring_lines.insert(idx);
2517        ctx.expect_docstring = false;
2518        if !closes_triple_docstring(trimmed, delim, true) {
2519            *active_docstring = Some((delim, idx));
2520        }
2521        return true;
2522    }
2523    ctx.expect_docstring = false;
2524    false
2525}
2526
2527/// Advance through an active multi-line docstring: marks the current line and clears
2528/// `active_docstring` when the closing delimiter is found. Returns `true` when the caller
2529/// should `continue` to the next line (i.e. we were inside a docstring).
2530fn track_active_docstring(
2531    active_docstring: &mut Option<(&'static str, usize)>,
2532    docstring_lines: &mut HashSet<usize>,
2533    idx: usize,
2534    trimmed: &str,
2535) -> bool {
2536    let Some((delim, start_line)) = *active_docstring else {
2537        return false;
2538    };
2539    docstring_lines.insert(idx);
2540    if closes_triple_docstring(trimmed, delim, idx == start_line) {
2541        *active_docstring = None;
2542    }
2543    true
2544}
2545
2546/// Attempt to record a docstring opener using the top of the context stack.
2547/// Returns `true` when the caller should `continue` to the next line.
2548fn try_record_docstring_if_context(
2549    contexts: &mut [PyContext],
2550    trimmed: &str,
2551    idx: usize,
2552    docstring_lines: &mut HashSet<usize>,
2553    active_docstring: &mut Option<(&'static str, usize)>,
2554) -> bool {
2555    let Some(ctx) = contexts.last_mut() else {
2556        return false;
2557    };
2558    py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2559}
2560
2561/// If an unclosed docstring is still active at end-of-file, mark all remaining lines.
2562fn mark_unclosed_docstring_lines(
2563    active_docstring: Option<&(&'static str, usize)>,
2564    docstring_lines: &mut HashSet<usize>,
2565    num_lines: usize,
2566) {
2567    if let Some(&(_, start_line)) = active_docstring {
2568        for idx in start_line..num_lines {
2569            docstring_lines.insert(idx);
2570        }
2571    }
2572}
2573
2574fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2575    let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2576    let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2577
2578    let mut docstring_lines = HashSet::new();
2579    let mut contexts = vec![PyContext {
2580        indent: 0,
2581        expect_docstring: true,
2582    }];
2583    let mut pending_block_indent: Option<usize> = None;
2584    let mut active_docstring: Option<(&'static str, usize)> = None;
2585
2586    for (idx, line) in lines.iter().enumerate() {
2587        let trimmed = line.trim();
2588        let indent = leading_indent(line);
2589
2590        if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2591            continue;
2592        }
2593
2594        // Blank lines and comment lines don't affect docstring detection.
2595        if trimmed.is_empty() || trimmed.starts_with('#') {
2596            continue;
2597        }
2598
2599        py_pop_outdented_contexts(&mut contexts, indent);
2600        py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2601
2602        if try_record_docstring_if_context(
2603            &mut contexts,
2604            trimmed,
2605            idx,
2606            &mut docstring_lines,
2607            &mut active_docstring,
2608        ) {
2609            continue;
2610        }
2611
2612        if is_python_block_header(trimmed) {
2613            pending_block_indent = Some(indent);
2614        }
2615    }
2616
2617    mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2618
2619    docstring_lines
2620}
2621
2622fn leading_indent(line: &str) -> usize {
2623    line.chars().take_while(|c| c.is_whitespace()).count()
2624}
2625
2626fn is_python_block_header(trimmed: &str) -> bool {
2627    (trimmed.starts_with("def ")
2628        || trimmed.starts_with("async def ")
2629        || trimmed.starts_with("class "))
2630        && trimmed.ends_with(':')
2631}
2632
2633fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2634    let mut idx = 0usize;
2635    let bytes = trimmed.as_bytes();
2636    while idx < bytes.len() {
2637        let c = bytes[idx] as char;
2638        if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2639            idx += 1;
2640            continue;
2641        }
2642        break;
2643    }
2644
2645    let rest = &trimmed[idx..];
2646    if rest.starts_with("\"\"\"") {
2647        Some("\"\"\"")
2648    } else if rest.starts_with("'''") {
2649        Some("'''")
2650    } else {
2651        None
2652    }
2653}
2654
2655fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2656    let mut occurrences = 0usize;
2657    let mut search = trimmed;
2658    while let Some(index) = search.find(delim) {
2659        occurrences += 1;
2660        search = &search[index + delim.len()..];
2661    }
2662
2663    if same_line_as_start {
2664        occurrences >= 2
2665    } else {
2666        occurrences >= 1
2667    }
2668}
2669
2670/// Tree-sitter-backed adapters (compiled only when the `tree-sitter` feature is enabled).
2671///
2672/// When parsing succeeds the result is used directly; on any failure the caller falls back
2673/// to the lexical state machine.
2674#[cfg(feature = "tree-sitter")]
2675pub mod ts {
2676    use tree_sitter::Node;
2677
2678    use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2679
2680    /// Classify every line of `text` using a tree-sitter grammar.
2681    ///
2682    /// `comment_node_kinds` โ€” node type names that represent comments in this grammar
2683    /// `docstring_stmt_kind` โ€” optional parent node type whose direct `string` child is a docstring
2684    fn analyze_lines(
2685        text: &str,
2686        ts_language: &tree_sitter::Language,
2687        comment_node_kinds: &[&str],
2688        docstring_stmt_kind: Option<&str>,
2689    ) -> Option<RawFileAnalysis> {
2690        let mut parser = tree_sitter::Parser::new();
2691        parser.set_language(ts_language).ok()?;
2692        let tree = parser.parse(text, None)?;
2693
2694        let lines: Vec<&str> = text.split_terminator('\n').collect();
2695        let n = lines.len();
2696
2697        let mut has_code = vec![false; n];
2698        let mut has_comment = vec![false; n];
2699        let mut comment_is_block = vec![false; n];
2700        let mut has_docstring = vec![false; n];
2701
2702        // Walk every node in the tree and mark line arrays.
2703        let mut ctx = VisitCtx {
2704            source: text.as_bytes(),
2705            comment_kinds: comment_node_kinds,
2706            docstring_stmt_kind,
2707            has_code: &mut has_code,
2708            has_comment: &mut has_comment,
2709            comment_is_block: &mut comment_is_block,
2710            has_docstring: &mut has_docstring,
2711        };
2712        visit(tree.root_node(), &mut ctx);
2713
2714        let mut raw = RawLineCounts::default();
2715        classify_ts_lines(
2716            &lines,
2717            &has_code,
2718            &has_comment,
2719            &comment_is_block,
2720            &has_docstring,
2721            &mut raw,
2722        );
2723
2724        Some(RawFileAnalysis {
2725            raw,
2726            parse_mode: ParseMode::TreeSitter,
2727            warnings: Vec::new(),
2728        })
2729    }
2730
2731    /// Flags describing what kinds of content appear on a single line.
2732    // Four bools are the natural representation for these four independent properties.
2733    #[allow(clippy::struct_excessive_bools)]
2734    #[derive(Clone, Copy)]
2735    struct TsLineFlags {
2736        has_code: bool,
2737        has_comment: bool,
2738        comment_is_block: bool,
2739        has_docstring: bool,
2740    }
2741
2742    /// Classify a single tree-sitter-annotated line and accumulate into `raw`.
2743    const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2744        if trimmed.is_empty() {
2745            raw.blank_only_lines += 1;
2746        } else if flags.has_docstring && !flags.has_code {
2747            raw.docstring_comment_lines += 1;
2748        } else if flags.has_code && flags.has_comment {
2749            // Classify the mixed line as single or multi based on what kind of comment is on it.
2750            if flags.comment_is_block {
2751                raw.mixed_code_multi_comment_lines += 1;
2752            } else {
2753                raw.mixed_code_single_comment_lines += 1;
2754            }
2755        } else if flags.has_comment {
2756            if flags.comment_is_block {
2757                raw.multi_comment_only_lines += 1;
2758            } else {
2759                raw.single_comment_only_lines += 1;
2760            }
2761        } else {
2762            raw.code_only_lines += 1;
2763        }
2764    }
2765
2766    /// Classify each tree-sitter-annotated line and accumulate counts into `raw`.
2767    fn classify_ts_lines(
2768        lines: &[&str],
2769        has_code: &[bool],
2770        has_comment: &[bool],
2771        comment_is_block: &[bool],
2772        has_docstring: &[bool],
2773        raw: &mut RawLineCounts,
2774    ) {
2775        for i in 0..lines.len() {
2776            raw.total_physical_lines += 1;
2777            classify_ts_line(
2778                lines[i].trim(),
2779                TsLineFlags {
2780                    has_code: has_code[i],
2781                    has_comment: has_comment[i],
2782                    comment_is_block: comment_is_block[i],
2783                    has_docstring: has_docstring[i],
2784                },
2785                raw,
2786            );
2787        }
2788    }
2789
2790    struct VisitCtx<'a> {
2791        source: &'a [u8],
2792        comment_kinds: &'a [&'a str],
2793        docstring_stmt_kind: Option<&'a str>,
2794        has_code: &'a mut Vec<bool>,
2795        has_comment: &'a mut Vec<bool>,
2796        comment_is_block: &'a mut Vec<bool>,
2797        has_docstring: &'a mut Vec<bool>,
2798    }
2799
2800    /// Mark all rows of a comment node and detect whether it is a block comment.
2801    fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2802        let start_row = node.start_position().row;
2803        let end_row = node.end_position().row;
2804        let first_two = node
2805            .utf8_text(ctx.source)
2806            .unwrap_or("")
2807            .get(..2)
2808            .unwrap_or("");
2809        let is_block = first_two == "/*" || first_two == "<#";
2810        for row in start_row..=end_row {
2811            if row < ctx.has_comment.len() {
2812                ctx.has_comment[row] = true;
2813                if is_block {
2814                    ctx.comment_is_block[row] = true;
2815                }
2816            }
2817        }
2818    }
2819
2820    /// If `node` is an `expression_statement` whose sole named child is a string literal,
2821    /// mark those rows as docstring and return `true`.
2822    fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2823        let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2824            return false;
2825        };
2826        if kind != stmt_kind || node.named_child_count() != 1 {
2827            return false;
2828        }
2829        let Some(child) = node.named_child(0) else {
2830            return false;
2831        };
2832        if child.kind() != "string" {
2833            return false;
2834        }
2835        let child_start = child.start_position().row;
2836        let child_end = child.end_position().row;
2837        for row in child_start..=child_end {
2838            if row < ctx.has_docstring.len() {
2839                ctx.has_docstring[row] = true;
2840            }
2841        }
2842        true
2843    }
2844
2845    /// Mark all rows of a leaf (non-comment, non-extra) node as code.
2846    fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2847        let start_row = node.start_position().row;
2848        let end_row = node.end_position().row;
2849        for row in start_row..=end_row {
2850            if row < ctx.has_code.len() {
2851                ctx.has_code[row] = true;
2852            }
2853        }
2854    }
2855
2856    #[allow(clippy::too_many_lines)]
2857    fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2858        // NOSONAR
2859        let kind = node.kind();
2860
2861        // Comment node โ€” mark rows as comment, detect block vs. line comment.
2862        if ctx.comment_kinds.contains(&kind) {
2863            visit_comment_node(node, ctx);
2864            return;
2865        }
2866
2867        // Python docstring: expression_statement whose only named child is a string literal.
2868        if visit_maybe_docstring(node, kind, ctx) {
2869            return;
2870        }
2871
2872        // Leaf non-comment node: mark as code.
2873        if node.child_count() == 0 && !node.is_extra() {
2874            visit_leaf_code(node, ctx);
2875            return;
2876        }
2877
2878        for i in 0..node.child_count() {
2879            #[allow(clippy::cast_possible_truncation)]
2880            // child_count bounded by tree-sitter u32 capacity
2881            if let Some(child) = node.child(i as u32) {
2882                visit(child, ctx);
2883            }
2884        }
2885    }
2886
2887    /// Parse C or C++ source with tree-sitter-c.
2888    #[must_use]
2889    pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
2890        let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
2891        analyze_lines(text, &lang, &["comment"], None)
2892    }
2893
2894    /// Parse Python source with tree-sitter-python.
2895    #[must_use]
2896    pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
2897        let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2898        analyze_lines(text, &lang, &["comment"], Some("expression_statement"))
2899    }
2900}
2901
2902#[cfg(test)]
2903mod tests {
2904    use super::*;
2905
2906    #[test]
2907    fn python_docstrings_are_separated() {
2908        let input = r#""""module docs"""
2909
2910
2911def fn_a():
2912    """function docs"""
2913    value = 1  # trailing comment
2914    return value
2915"#;
2916
2917        let result = analyze_text(Language::Python, input, AnalysisOptions::default());
2918        assert_eq!(result.raw.docstring_comment_lines, 2);
2919        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2920        assert_eq!(result.raw.code_only_lines, 2);
2921    }
2922
2923    #[test]
2924    fn c_style_mixed_lines_are_captured() {
2925        let input = "int x = 1; // note\n/* block */\n";
2926        let result = analyze_text(Language::C, input, AnalysisOptions::default());
2927        assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2928        assert_eq!(result.raw.multi_comment_only_lines, 1);
2929    }
2930
2931    #[test]
2932    fn detect_language_by_shebang() {
2933        let language = detect_language(
2934            Path::new("script"),
2935            Some("#!/usr/bin/env bash"),
2936            &BTreeMap::new(),
2937            true,
2938        );
2939        assert_eq!(language, Some(Language::Shell));
2940    }
2941}