1pub mod style;
5pub use style::{IndentStyle, StyleAnalysis, StyleGuideScore, StyleSignal};
6
7use std::collections::{BTreeMap, BTreeSet, HashSet};
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum Language {
15 C,
16 Cpp,
17 CSharp,
18 Go,
19 Java,
20 JavaScript,
21 Python,
22 Rust,
23 Shell,
24 PowerShell,
25 TypeScript,
26 Assembly,
28 Clojure,
29 Css,
30 Dart,
31 Dockerfile,
32 Elixir,
33 Erlang,
34 FSharp,
35 Groovy,
36 Haskell,
37 Html,
38 Julia,
39 Kotlin,
40 Lua,
41 Makefile,
42 Nim,
43 ObjectiveC,
44 Ocaml,
45 Perl,
46 Php,
47 R,
48 Ruby,
49 Scala,
50 Scss,
51 Sql,
52 Svelte,
53 Swift,
54 Vue,
55 Xml,
56 Zig,
57 Solidity,
59 Protobuf,
60 Hcl,
61 GraphQl,
62 Ada,
64 Vhdl,
65 Verilog,
66 Tcl,
67 Pascal,
68 VisualBasic,
69 Lisp,
70 Fortran,
72 Nix,
73 Crystal,
74 D,
75 Glsl,
76 Cmake,
77 Elm,
78 Awk,
79}
80
81impl Language {
82 #[must_use]
83 pub const fn display_name(&self) -> &'static str {
84 match self {
85 Self::C => "C",
86 Self::Cpp => "C++",
87 Self::CSharp => "C#",
88 Self::Go => "Go",
89 Self::Java => "Java",
90 Self::JavaScript => "JavaScript",
91 Self::Python => "Python",
92 Self::Rust => "Rust",
93 Self::Shell => "Shell",
94 Self::PowerShell => "PowerShell",
95 Self::TypeScript => "TypeScript",
96 Self::Assembly => "Assembly",
97 Self::Clojure => "Clojure",
98 Self::Css => "CSS",
99 Self::Dart => "Dart",
100 Self::Dockerfile => "Dockerfile",
101 Self::Elixir => "Elixir",
102 Self::Erlang => "Erlang",
103 Self::FSharp => "F#",
104 Self::Groovy => "Groovy",
105 Self::Haskell => "Haskell",
106 Self::Html => "HTML",
107 Self::Julia => "Julia",
108 Self::Kotlin => "Kotlin",
109 Self::Lua => "Lua",
110 Self::Makefile => "Makefile",
111 Self::Nim => "Nim",
112 Self::ObjectiveC => "Objective-C",
113 Self::Ocaml => "OCaml",
114 Self::Perl => "Perl",
115 Self::Php => "PHP",
116 Self::R => "R",
117 Self::Ruby => "Ruby",
118 Self::Scala => "Scala",
119 Self::Scss => "SCSS",
120 Self::Sql => "SQL",
121 Self::Svelte => "Svelte",
122 Self::Swift => "Swift",
123 Self::Vue => "Vue",
124 Self::Xml => "XML",
125 Self::Zig => "Zig",
126 Self::Solidity => "Solidity",
127 Self::Protobuf => "Protocol Buffers",
128 Self::Hcl => "HCL/Terraform",
129 Self::GraphQl => "GraphQL",
130 Self::Ada => "Ada",
131 Self::Vhdl => "VHDL",
132 Self::Verilog => "Verilog/SystemVerilog",
133 Self::Tcl => "Tcl",
134 Self::Pascal => "Pascal/Delphi",
135 Self::VisualBasic => "Visual Basic",
136 Self::Lisp => "Lisp/Scheme",
137 Self::Fortran => "Fortran",
138 Self::Nix => "Nix",
139 Self::Crystal => "Crystal",
140 Self::D => "D",
141 Self::Glsl => "GLSL/HLSL",
142 Self::Cmake => "CMake",
143 Self::Elm => "Elm",
144 Self::Awk => "Awk",
145 }
146 }
147
148 #[must_use]
149 pub const fn as_slug(&self) -> &'static str {
150 match self {
151 Self::C => "c",
152 Self::Cpp => "cpp",
153 Self::CSharp => "csharp",
154 Self::Go => "go",
155 Self::Java => "java",
156 Self::JavaScript => "javascript",
157 Self::Python => "python",
158 Self::Rust => "rust",
159 Self::Shell => "shell",
160 Self::PowerShell => "powershell",
161 Self::TypeScript => "typescript",
162 Self::Assembly => "assembly",
163 Self::Clojure => "clojure",
164 Self::Css => "css",
165 Self::Dart => "dart",
166 Self::Dockerfile => "dockerfile",
167 Self::Elixir => "elixir",
168 Self::Erlang => "erlang",
169 Self::FSharp => "fsharp",
170 Self::Groovy => "groovy",
171 Self::Haskell => "haskell",
172 Self::Html => "html",
173 Self::Julia => "julia",
174 Self::Kotlin => "kotlin",
175 Self::Lua => "lua",
176 Self::Makefile => "makefile",
177 Self::Nim => "nim",
178 Self::ObjectiveC => "objectivec",
179 Self::Ocaml => "ocaml",
180 Self::Perl => "perl",
181 Self::Php => "php",
182 Self::R => "r",
183 Self::Ruby => "ruby",
184 Self::Scala => "scala",
185 Self::Scss => "scss",
186 Self::Sql => "sql",
187 Self::Svelte => "svelte",
188 Self::Swift => "swift",
189 Self::Vue => "vue",
190 Self::Xml => "xml",
191 Self::Zig => "zig",
192 Self::Solidity => "solidity",
193 Self::Protobuf => "protobuf",
194 Self::Hcl => "hcl",
195 Self::GraphQl => "graphql",
196 Self::Ada => "ada",
197 Self::Vhdl => "vhdl",
198 Self::Verilog => "verilog",
199 Self::Tcl => "tcl",
200 Self::Pascal => "pascal",
201 Self::VisualBasic => "visualbasic",
202 Self::Lisp => "lisp",
203 Self::Fortran => "fortran",
204 Self::Nix => "nix",
205 Self::Crystal => "crystal",
206 Self::D => "d",
207 Self::Glsl => "glsl",
208 Self::Cmake => "cmake",
209 Self::Elm => "elm",
210 Self::Awk => "awk",
211 }
212 }
213
214 #[must_use]
215 pub fn from_name(name: &str) -> Option<Self> {
216 match name.trim().to_ascii_lowercase().as_str() {
217 "c" => Some(Self::C),
218 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
219 "csharp" | "c#" | "cs" => Some(Self::CSharp),
220 "go" | "golang" => Some(Self::Go),
221 "java" => Some(Self::Java),
222 "javascript" | "js" => Some(Self::JavaScript),
223 "python" | "py" => Some(Self::Python),
224 "rust" | "rs" => Some(Self::Rust),
225 "shell" | "sh" | "bash" => Some(Self::Shell),
226 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
227 "typescript" | "ts" => Some(Self::TypeScript),
228 "assembly" | "asm" => Some(Self::Assembly),
229 "clojure" | "clj" => Some(Self::Clojure),
230 "css" => Some(Self::Css),
231 "dart" => Some(Self::Dart),
232 "dockerfile" | "docker" => Some(Self::Dockerfile),
233 "elixir" | "ex" => Some(Self::Elixir),
234 "erlang" | "erl" => Some(Self::Erlang),
235 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
236 "groovy" => Some(Self::Groovy),
237 "haskell" | "hs" => Some(Self::Haskell),
238 "html" | "htm" => Some(Self::Html),
239 "julia" | "jl" => Some(Self::Julia),
240 "kotlin" | "kt" => Some(Self::Kotlin),
241 "lua" => Some(Self::Lua),
242 "makefile" | "make" | "mk" => Some(Self::Makefile),
243 "nim" => Some(Self::Nim),
244 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
245 "ocaml" | "ml" => Some(Self::Ocaml),
246 "perl" | "pl" => Some(Self::Perl),
247 "php" => Some(Self::Php),
248 "r" => Some(Self::R),
249 "ruby" | "rb" => Some(Self::Ruby),
250 "scala" => Some(Self::Scala),
251 "scss" | "sass" => Some(Self::Scss),
252 "sql" => Some(Self::Sql),
253 "svelte" => Some(Self::Svelte),
254 "swift" => Some(Self::Swift),
255 "vue" => Some(Self::Vue),
256 "xml" => Some(Self::Xml),
257 "zig" => Some(Self::Zig),
258 "solidity" | "sol" => Some(Self::Solidity),
259 "protobuf" | "proto" | "protocolbuffers" => Some(Self::Protobuf),
260 "hcl" | "terraform" | "tf" => Some(Self::Hcl),
261 "graphql" | "gql" => Some(Self::GraphQl),
262 "ada" => Some(Self::Ada),
263 "vhdl" => Some(Self::Vhdl),
264 "verilog" | "systemverilog" | "sv" => Some(Self::Verilog),
265 "tcl" => Some(Self::Tcl),
266 "pascal" | "delphi" | "pas" => Some(Self::Pascal),
267 "visualbasic" | "vb" | "vbnet" | "vb.net" => Some(Self::VisualBasic),
268 "lisp" | "scheme" | "racket" | "clisp" | "elisp" => Some(Self::Lisp),
269 "fortran" | "f90" | "f95" => Some(Self::Fortran),
270 "nix" => Some(Self::Nix),
271 "crystal" | "cr" => Some(Self::Crystal),
272 "d" | "dlang" => Some(Self::D),
273 "glsl" | "hlsl" | "shader" | "wgsl" => Some(Self::Glsl),
274 "cmake" => Some(Self::Cmake),
275 "elm" => Some(Self::Elm),
276 "awk" => Some(Self::Awk),
277 _ => None,
278 }
279 }
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, Default)]
283pub struct RawLineCounts {
284 pub total_physical_lines: u64,
285 pub blank_only_lines: u64,
286 pub code_only_lines: u64,
287 pub single_comment_only_lines: u64,
288 pub multi_comment_only_lines: u64,
289 pub mixed_code_single_comment_lines: u64,
290 pub mixed_code_multi_comment_lines: u64,
291 pub docstring_comment_lines: u64,
292 pub skipped_unknown_lines: u64,
293 #[serde(default)]
295 pub functions: u64,
296 #[serde(default)]
298 pub classes: u64,
299 #[serde(default)]
301 pub variables: u64,
302 #[serde(default)]
304 pub imports: u64,
305 #[serde(default)]
309 pub compiler_directive_lines: u64,
310 #[serde(default)]
313 pub test_count: u64,
314 #[serde(default)]
317 pub test_assertion_count: u64,
318 #[serde(default)]
321 pub test_suite_count: u64,
322 #[serde(default)]
325 pub cyclomatic_complexity: u32,
326 #[serde(default, skip_serializing_if = "Option::is_none")]
329 pub lsloc: Option<u32>,
330 #[serde(skip)]
333 pub code_line_hashes: Vec<u64>,
334}
335
336#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ParseMode {
339 Lexical,
340 LexicalBestEffort,
341 TreeSitter,
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct RawFileAnalysis {
346 pub raw: RawLineCounts,
347 pub parse_mode: ParseMode,
348 pub warnings: Vec<String>,
349 #[serde(default, skip_serializing_if = "Option::is_none")]
351 pub style_analysis: Option<StyleAnalysis>,
352}
353
354#[derive(Debug, Clone, Copy)]
359pub struct AnalysisOptions {
360 pub blank_in_block_comment_as_comment: bool,
363 pub collapse_continuation_lines: bool,
366 pub enable_style: bool,
369 pub style_lang_scope: StyleLangScope,
372}
373
374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
376pub enum StyleLangScope {
377 All,
378 CFamilyOnly,
379}
380
381#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum LslocStrategy {
384 Semicolons,
386 NonContinuationNewlines,
389 Unsupported,
392}
393
394impl Default for AnalysisOptions {
395 fn default() -> Self {
396 Self {
397 blank_in_block_comment_as_comment: true,
398 collapse_continuation_lines: false,
399 enable_style: true,
400 style_lang_scope: StyleLangScope::All,
401 }
402 }
403}
404
405#[must_use]
406pub fn supported_languages() -> BTreeSet<Language> {
407 [
408 Language::Assembly,
409 Language::C,
410 Language::Clojure,
411 Language::Cpp,
412 Language::CSharp,
413 Language::Css,
414 Language::Dart,
415 Language::Dockerfile,
416 Language::Elixir,
417 Language::Erlang,
418 Language::FSharp,
419 Language::Go,
420 Language::Groovy,
421 Language::Haskell,
422 Language::Html,
423 Language::Java,
424 Language::JavaScript,
425 Language::Julia,
426 Language::Kotlin,
427 Language::Lua,
428 Language::Makefile,
429 Language::Nim,
430 Language::ObjectiveC,
431 Language::Ocaml,
432 Language::Perl,
433 Language::Php,
434 Language::PowerShell,
435 Language::Python,
436 Language::R,
437 Language::Ruby,
438 Language::Rust,
439 Language::Scala,
440 Language::Scss,
441 Language::Shell,
442 Language::Sql,
443 Language::Svelte,
444 Language::Swift,
445 Language::TypeScript,
446 Language::Vue,
447 Language::Xml,
448 Language::Zig,
449 Language::Solidity,
450 Language::Protobuf,
451 Language::Hcl,
452 Language::GraphQl,
453 Language::Ada,
454 Language::Vhdl,
455 Language::Verilog,
456 Language::Tcl,
457 Language::Pascal,
458 Language::VisualBasic,
459 Language::Lisp,
460 Language::Fortran,
461 Language::Nix,
462 Language::Crystal,
463 Language::D,
464 Language::Glsl,
465 Language::Cmake,
466 Language::Elm,
467 Language::Awk,
468 ]
469 .into_iter()
470 .collect()
471}
472
473fn detect_by_shebang(line: &str) -> Option<Language> {
475 let lower = line.to_ascii_lowercase();
476 if !lower.starts_with("#!") {
477 return None;
478 }
479 if lower.contains("python") {
480 return Some(Language::Python);
481 }
482 if lower.contains("pwsh") || lower.contains("powershell") {
483 return Some(Language::PowerShell);
484 }
485 if lower.contains("bash")
486 || lower.contains("/sh")
487 || lower.contains("zsh")
488 || lower.contains("ksh")
489 {
490 return Some(Language::Shell);
491 }
492 if lower.contains("ruby") {
493 return Some(Language::Ruby);
494 }
495 if lower.contains("perl") {
496 return Some(Language::Perl);
497 }
498 if lower.contains("php") {
499 return Some(Language::Php);
500 }
501 if lower.contains("node") || lower.contains("nodejs") {
502 return Some(Language::JavaScript);
503 }
504 None
505}
506
507#[allow(clippy::too_many_lines)]
509fn detect_by_extension(ext: &str) -> Option<Language> {
510 static EXT_MAP: &[(&str, Language)] = &[
512 ("c", Language::C),
513 ("h", Language::C),
514 ("cc", Language::Cpp),
515 ("cp", Language::Cpp),
516 ("cpp", Language::Cpp),
517 ("cxx", Language::Cpp),
518 ("hh", Language::Cpp),
519 ("hpp", Language::Cpp),
520 ("hxx", Language::Cpp),
521 ("cs", Language::CSharp),
522 ("go", Language::Go),
523 ("java", Language::Java),
524 ("js", Language::JavaScript),
525 ("mjs", Language::JavaScript),
526 ("cjs", Language::JavaScript),
527 ("py", Language::Python),
528 ("rs", Language::Rust),
529 ("sh", Language::Shell),
530 ("bash", Language::Shell),
531 ("zsh", Language::Shell),
532 ("ksh", Language::Shell),
533 ("ps1", Language::PowerShell),
534 ("psm1", Language::PowerShell),
535 ("psd1", Language::PowerShell),
536 ("ts", Language::TypeScript),
537 ("mts", Language::TypeScript),
538 ("cts", Language::TypeScript),
539 ("tsx", Language::TypeScript),
540 ("jsx", Language::JavaScript),
541 ("asm", Language::Assembly),
542 ("s", Language::Assembly),
543 ("clj", Language::Clojure),
544 ("cljs", Language::Clojure),
545 ("cljc", Language::Clojure),
546 ("edn", Language::Clojure),
547 ("css", Language::Css),
548 ("dart", Language::Dart),
549 ("ex", Language::Elixir),
550 ("exs", Language::Elixir),
551 ("erl", Language::Erlang),
552 ("hrl", Language::Erlang),
553 ("fs", Language::FSharp),
554 ("fsi", Language::FSharp),
555 ("fsx", Language::FSharp),
556 ("groovy", Language::Groovy),
557 ("gradle", Language::Groovy),
558 ("hs", Language::Haskell),
559 ("lhs", Language::Haskell),
560 ("html", Language::Html),
561 ("htm", Language::Html),
562 ("xhtml", Language::Html),
563 ("jl", Language::Julia),
564 ("kt", Language::Kotlin),
565 ("kts", Language::Kotlin),
566 ("lua", Language::Lua),
567 ("mk", Language::Makefile),
568 ("nim", Language::Nim),
569 ("nims", Language::Nim),
570 ("m", Language::ObjectiveC),
571 ("mm", Language::ObjectiveC),
572 ("ml", Language::Ocaml),
573 ("mli", Language::Ocaml),
574 ("pl", Language::Perl),
575 ("pm", Language::Perl),
576 ("t", Language::Perl),
577 ("php", Language::Php),
578 ("php3", Language::Php),
579 ("php4", Language::Php),
580 ("php5", Language::Php),
581 ("php7", Language::Php),
582 ("phtml", Language::Php),
583 ("r", Language::R),
584 ("rb", Language::Ruby),
585 ("rake", Language::Ruby),
586 ("scala", Language::Scala),
587 ("sc", Language::Scala),
588 ("scss", Language::Scss),
589 ("sass", Language::Scss),
590 ("sql", Language::Sql),
591 ("svelte", Language::Svelte),
592 ("swift", Language::Swift),
593 ("vue", Language::Vue),
594 ("xml", Language::Xml),
595 ("xsd", Language::Xml),
596 ("xsl", Language::Xml),
597 ("xslt", Language::Xml),
598 ("svg", Language::Xml),
599 ("zig", Language::Zig),
600 ("sol", Language::Solidity),
601 ("proto", Language::Protobuf),
602 ("tf", Language::Hcl),
603 ("tfvars", Language::Hcl),
604 ("hcl", Language::Hcl),
605 ("graphql", Language::GraphQl),
606 ("gql", Language::GraphQl),
607 ("adb", Language::Ada),
608 ("ads", Language::Ada),
609 ("ada", Language::Ada),
610 ("vhd", Language::Vhdl),
611 ("vhdl", Language::Vhdl),
612 ("v", Language::Verilog),
613 ("sv", Language::Verilog),
614 ("svh", Language::Verilog),
615 ("vh", Language::Verilog),
616 ("tcl", Language::Tcl),
617 ("pas", Language::Pascal),
618 ("dpr", Language::Pascal),
619 ("vb", Language::VisualBasic),
620 ("bas", Language::VisualBasic),
621 ("lisp", Language::Lisp),
622 ("lsp", Language::Lisp),
623 ("el", Language::Lisp),
624 ("scm", Language::Lisp),
625 ("ss", Language::Lisp),
626 ("rkt", Language::Lisp),
627 ("f90", Language::Fortran),
628 ("f95", Language::Fortran),
629 ("f03", Language::Fortran),
630 ("f08", Language::Fortran),
631 ("f", Language::Fortran),
632 ("for", Language::Fortran),
633 ("nix", Language::Nix),
634 ("cr", Language::Crystal),
635 ("d", Language::D),
636 ("glsl", Language::Glsl),
637 ("vert", Language::Glsl),
638 ("frag", Language::Glsl),
639 ("comp", Language::Glsl),
640 ("geom", Language::Glsl),
641 ("tesc", Language::Glsl),
642 ("tese", Language::Glsl),
643 ("hlsl", Language::Glsl),
644 ("wgsl", Language::Glsl),
645 ("cmake", Language::Cmake),
646 ("elm", Language::Elm),
647 ("awk", Language::Awk),
648 ];
649 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
650}
651
652fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
654 if filename == "Dockerfile"
656 || filename.starts_with("Dockerfile.")
657 || filename_lower == "dockerfile"
658 {
659 return Some(Language::Dockerfile);
660 }
661 if matches!(
663 filename,
664 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
665 ) {
666 return Some(Language::Makefile);
667 }
668 if matches!(
670 filename,
671 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
672 ) {
673 return Some(Language::Ruby);
674 }
675 if filename == "CMakeLists.txt" || filename_lower == "cmakelists.txt" {
678 return Some(Language::Cmake);
679 }
680 None
681}
682
683#[must_use]
684#[allow(clippy::too_many_lines)]
685pub fn detect_language(
686 path: &Path,
687 first_line: Option<&str>,
688 extension_overrides: &BTreeMap<String, String>,
689 shebang_detection: bool,
690) -> Option<Language> {
691 let extension = path
692 .extension()
693 .and_then(|ext| ext.to_str())
694 .map(str::to_ascii_lowercase);
695
696 if let Some(ext) = extension.as_ref() {
698 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
699 if let Some(lang) = Language::from_name(override_name) {
700 return Some(lang);
701 }
702 }
703 }
704
705 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
707 let filename_lower = filename.to_ascii_lowercase();
708
709 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
710 return Some(lang);
711 }
712
713 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
715 return Some(lang);
716 }
717
718 if shebang_detection {
720 if let Some(line) = first_line {
721 if let Some(lang) = detect_by_shebang(line) {
722 return Some(lang);
723 }
724 }
725 }
726
727 None
728}
729
730#[must_use]
731pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
732 #[cfg(feature = "tree-sitter")]
734 {
735 match language {
736 Language::C | Language::Cpp => {
737 if let Some(mut result) = ts::analyze_c(text) {
738 if options.enable_style
739 && should_style_analyse(language, options.style_lang_scope)
740 {
741 result.style_analysis = style::analyze_style(language, text);
742 }
743 return result;
744 }
745 }
746 Language::Python => {
747 if let Some(result) = ts::analyze_python(text) {
748 return result;
749 }
750 }
751 _ => {}
752 }
753 }
754
755 let (mut config, has_preprocessor) = language_scan_config(language);
756
757 if language == Language::Python {
759 config.skip_lines = detect_python_docstring_lines(text);
760 }
761
762 let flags = IeeeFlags {
765 has_preprocessor_directives: has_preprocessor,
766 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
767 collapse_continuation_lines: options.collapse_continuation_lines,
768 };
769 let mut result = analyze_generic(text, config, flags);
770 if options.enable_style && should_style_analyse(language, options.style_lang_scope) {
771 result.style_analysis = style::analyze_style(language, text);
772 }
773 result
774}
775
776const fn should_style_analyse(language: Language, scope: StyleLangScope) -> bool {
778 match scope {
779 StyleLangScope::CFamilyOnly => {
780 matches!(language, Language::C | Language::Cpp | Language::ObjectiveC)
781 }
782 StyleLangScope::All => true,
783 }
784}
785
786fn language_scan_config(language: Language) -> (ScanConfig, bool) {
794 let cfg = LANG_SCAN_TABLE
795 .iter()
796 .find_map(|&(l, c)| (l == language).then_some(c))
797 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
798 let (branch_keywords, lsloc_strategy) = language_complexity_config(language);
799 (
800 ScanConfig {
801 line_comments: cfg.line_comments,
802 block_comment: cfg.block_comment,
803 allow_single_quote_strings: cfg.allow_single_quote_strings,
804 allow_double_quote_strings: cfg.allow_double_quote_strings,
805 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
806 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
807 skip_lines: HashSet::new(),
808 symbol_patterns: cfg.symbol_patterns,
809 branch_keywords,
810 lsloc_strategy,
811 },
812 cfg.has_preprocessor,
813 )
814}
815
816const BRANCH_C_FAMILY: &[&str] = &[
821 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&",
822];
823const BRANCH_C_TERNARY: &[&str] = &[
824 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&", "?",
825];
826const BRANCH_GO: &[&str] = &["if", "else", "for", "switch", "case", "select", "||", "&&"];
827const BRANCH_RUST: &[&str] = &["if", "else", "for", "while", "match", "||", "&&"];
828const BRANCH_ZIG: &[&str] = &["if", "else", "for", "while", "switch", "catch", "||", "&&"];
829const BRANCH_FSHARP: &[&str] = &["if", "then", "else", "elif", "match", "when", "||", "&&"];
830const BRANCH_LUA: &[&str] = &[
831 "if", "elseif", "else", "for", "while", "repeat", "and", "or",
832];
833const BRANCH_HASKELL: &[&str] = &["if", "then", "else", "case", "otherwise"];
834const BRANCH_SQL: &[&str] = &["CASE", "WHEN", "IF", "ELSE", "case", "when", "if", "else"];
835const BRANCH_OCAML: &[&str] = &["if", "then", "else", "match", "when", "||", "&&"];
836const BRANCH_CLOJURE: &[&str] = &["if", "when", "cond", "case", "and", "or"];
837const BRANCH_PHP: &[&str] = &[
838 "if", "elseif", "else", "for", "while", "switch", "case", "catch", "match", "||", "&&", "?",
839];
840const BRANCH_JULIA: &[&str] = &["if", "elseif", "else", "for", "while", "catch", "||", "&&"];
841const BRANCH_PYTHON: &[&str] = &["if", "elif", "else", "for", "while", "except", "or", "and"];
842const BRANCH_RUBY: &[&str] = &[
843 "if", "elsif", "else", "unless", "until", "while", "case", "when", "rescue", "||", "&&",
844];
845const BRANCH_SHELL: &[&str] = &["if", "elif", "else", "while", "until", "case", "||", "&&"];
846const BRANCH_ELIXIR: &[&str] = &[
847 "if", "else", "cond", "case", "when", "rescue", "||", "&&", "and", "or",
848];
849const BRANCH_POWERSHELL: &[&str] = &[
850 "if", "elseif", "else", "for", "while", "switch", "foreach", "||", "&&",
851];
852const BRANCH_NIM: &[&str] = &[
853 "if", "elif", "else", "for", "while", "case", "of", "except", "and", "or",
854];
855const BRANCH_PERL: &[&str] = &[
856 "if", "elsif", "else", "unless", "until", "for", "while", "foreach", "||", "&&",
857];
858const BRANCH_R: &[&str] = &["if", "else", "for", "while", "repeat", "||", "&&"];
859const BRANCH_ADA: &[&str] = &[
861 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or",
862];
863const BRANCH_VHDL: &[&str] = &[
864 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or", "nand", "nor",
865 "xor",
866];
867const BRANCH_VERILOG: &[&str] = &[
868 "if", "else", "case", "casex", "casez", "for", "while", "&&", "||",
869];
870const BRANCH_TCL: &[&str] = &["if", "elseif", "else", "switch", "while", "for", "foreach"];
871const BRANCH_PASCAL: &[&str] = &[
872 "if", "then", "else", "case", "while", "for", "repeat", "until", "and", "or",
873];
874const BRANCH_VB: &[&str] = &[
875 "If", "Then", "ElseIf", "Else", "Select", "Case", "While", "For", "Do", "And", "Or",
876];
877const BRANCH_LISP: &[&str] = &["if", "when", "unless", "cond", "case", "and", "or"];
878const BRANCH_FORTRAN: &[&str] = &[
880 "if", "then", "else", "elseif", "case", "do", "while", "where",
881];
882const BRANCH_NIX: &[&str] = &["if", "then", "else"];
883const BRANCH_CMAKE: &[&str] = &["if(", "elseif(", "else(", "while(", "foreach("];
884const BRANCH_ELM: &[&str] = &["if", "then", "else", "case", "of"];
885const BRANCH_AWK: &[&str] = &["if", "else", "while", "for", "do"];
886
887const fn language_complexity_config(
890 language: Language,
891) -> (&'static [&'static str], LslocStrategy) {
892 match language {
893 Language::C
895 | Language::Cpp
896 | Language::ObjectiveC
897 | Language::CSharp
898 | Language::JavaScript
899 | Language::TypeScript
900 | Language::Svelte
901 | Language::Vue
902 | Language::Dart
903 | Language::Groovy
904 | Language::Swift
905 | Language::Solidity => (BRANCH_C_TERNARY, LslocStrategy::Semicolons),
906 Language::Java | Language::Kotlin | Language::Scala | Language::D | Language::Glsl => {
908 (BRANCH_C_FAMILY, LslocStrategy::Semicolons)
909 }
910 Language::Go => (BRANCH_GO, LslocStrategy::Semicolons),
911 Language::Rust => (BRANCH_RUST, LslocStrategy::Semicolons),
912 Language::Zig => (BRANCH_ZIG, LslocStrategy::Semicolons),
913 Language::FSharp => (BRANCH_FSHARP, LslocStrategy::Unsupported),
914 Language::Shell => (BRANCH_SHELL, LslocStrategy::NonContinuationNewlines),
916 Language::Elixir => (BRANCH_ELIXIR, LslocStrategy::NonContinuationNewlines),
917 Language::Perl => (BRANCH_PERL, LslocStrategy::Semicolons),
918 Language::R => (BRANCH_R, LslocStrategy::NonContinuationNewlines),
919 Language::Ruby | Language::Crystal => (BRANCH_RUBY, LslocStrategy::NonContinuationNewlines),
920 Language::Python => (BRANCH_PYTHON, LslocStrategy::NonContinuationNewlines),
921 Language::PowerShell => (BRANCH_POWERSHELL, LslocStrategy::Unsupported),
922 Language::Nim => (BRANCH_NIM, LslocStrategy::NonContinuationNewlines),
923 Language::Lua => (BRANCH_LUA, LslocStrategy::Unsupported),
925 Language::Haskell => (BRANCH_HASKELL, LslocStrategy::Unsupported),
926 Language::Sql => (BRANCH_SQL, LslocStrategy::Semicolons),
927 Language::Ocaml => (BRANCH_OCAML, LslocStrategy::Semicolons),
928 Language::Clojure => (BRANCH_CLOJURE, LslocStrategy::Unsupported),
929 Language::Php => (BRANCH_PHP, LslocStrategy::Semicolons),
930 Language::Julia => (BRANCH_JULIA, LslocStrategy::NonContinuationNewlines),
931 Language::Protobuf => (&[], LslocStrategy::Semicolons),
932 Language::Hcl => (&[], LslocStrategy::NonContinuationNewlines),
933 Language::Ada => (BRANCH_ADA, LslocStrategy::Semicolons),
935 Language::Vhdl => (BRANCH_VHDL, LslocStrategy::Semicolons),
936 Language::Verilog => (BRANCH_VERILOG, LslocStrategy::Semicolons),
937 Language::Tcl => (BRANCH_TCL, LslocStrategy::NonContinuationNewlines),
938 Language::Pascal => (BRANCH_PASCAL, LslocStrategy::Semicolons),
939 Language::VisualBasic => (BRANCH_VB, LslocStrategy::NonContinuationNewlines),
940 Language::Lisp => (BRANCH_LISP, LslocStrategy::Unsupported),
941 Language::Fortran => (BRANCH_FORTRAN, LslocStrategy::NonContinuationNewlines),
943 Language::Nix => (BRANCH_NIX, LslocStrategy::Unsupported),
944 Language::Cmake => (BRANCH_CMAKE, LslocStrategy::Unsupported),
945 Language::Elm => (BRANCH_ELM, LslocStrategy::Unsupported),
946 Language::Awk => (BRANCH_AWK, LslocStrategy::NonContinuationNewlines),
947 Language::Makefile
949 | Language::Dockerfile
950 | Language::Css
951 | Language::Html
952 | Language::Xml
953 | Language::Assembly
954 | Language::Erlang
955 | Language::GraphQl
956 | Language::Scss => (&[], LslocStrategy::Unsupported),
957 }
958}
959
960#[derive(Debug, Clone, Copy)]
964struct SymbolPatterns {
965 functions: &'static [&'static str],
966 functions_prefix_paren: &'static [&'static str],
972 classes: &'static [&'static str],
973 variables: &'static [&'static str],
974 imports: &'static [&'static str],
975 tests: &'static [&'static str],
978 assertions: &'static [&'static str],
981 test_suites: &'static [&'static str],
984 variables_prefix_no_paren: &'static [&'static str],
990}
991
992impl SymbolPatterns {
993 const fn none() -> Self {
994 Self {
995 functions: &[],
996 functions_prefix_paren: &[],
997 classes: &[],
998 variables: &[],
999 imports: &[],
1000 tests: &[],
1001 assertions: &[],
1002 test_suites: &[],
1003 variables_prefix_no_paren: &[],
1004 }
1005 }
1006}
1007
1008const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_SOLIDITY: SymbolPatterns = SymbolPatterns {
1013 functions: &[
1014 "function ",
1015 "modifier ",
1016 "constructor",
1017 "receive ",
1018 "fallback ",
1019 ],
1020 functions_prefix_paren: &[],
1021 classes: &["contract ", "interface ", "library ", "struct ", "enum "],
1022 variables: &[],
1023 imports: &["import "],
1024 tests: &["function test", "function testFuzz", "function invariant"],
1027 assertions: &[
1028 "assertEq(",
1029 "assertEq0(",
1030 "assertTrue(",
1031 "assertFalse(",
1032 "assertGt(",
1033 "assertLt(",
1034 "assertGe(",
1035 "assertLe(",
1036 "assertApproxEq",
1037 "vm.expectRevert(",
1038 "vm.expectEmit(",
1039 ],
1040 test_suites: &[],
1041 variables_prefix_no_paren: &[],
1042};
1043
1044const SP_PROTOBUF: SymbolPatterns = SymbolPatterns {
1047 functions: &["rpc "],
1048 functions_prefix_paren: &[],
1049 classes: &["message ", "service ", "enum "],
1050 variables: &[],
1051 imports: &["import "],
1052 tests: &[],
1053 assertions: &[],
1054 test_suites: &[],
1055 variables_prefix_no_paren: &[],
1056};
1057
1058const SP_ADA: SymbolPatterns = SymbolPatterns {
1060 functions: &["procedure ", "function "],
1061 functions_prefix_paren: &[],
1062 classes: &["package ", "type ", "task ", "protected "],
1063 variables: &[],
1064 imports: &["with ", "use "],
1065 tests: &[],
1066 assertions: &[],
1067 test_suites: &[],
1068 variables_prefix_no_paren: &[],
1069};
1070
1071const SP_VHDL: SymbolPatterns = SymbolPatterns {
1072 functions: &["function ", "procedure ", "process "],
1073 functions_prefix_paren: &[],
1074 classes: &["entity ", "architecture ", "package ", "component "],
1075 variables: &[],
1076 imports: &["library ", "use "],
1077 tests: &[],
1078 assertions: &[],
1079 test_suites: &[],
1080 variables_prefix_no_paren: &[],
1081};
1082
1083const SP_VERILOG: SymbolPatterns = SymbolPatterns {
1084 functions: &["function ", "task "],
1085 functions_prefix_paren: &[],
1086 classes: &["module ", "interface ", "class ", "package "],
1087 variables: &[],
1088 imports: &["import ", "`include"],
1089 tests: &[],
1090 assertions: &[],
1091 test_suites: &[],
1092 variables_prefix_no_paren: &[],
1093};
1094
1095const SP_TCL: SymbolPatterns = SymbolPatterns {
1096 functions: &["proc "],
1097 functions_prefix_paren: &[],
1098 classes: &[],
1099 variables: &[],
1100 imports: &["source ", "package require "],
1101 tests: &["test "],
1103 assertions: &[],
1104 test_suites: &[],
1105 variables_prefix_no_paren: &[],
1106};
1107
1108const SP_PASCAL: SymbolPatterns = SymbolPatterns {
1109 functions: &["procedure ", "function "],
1110 functions_prefix_paren: &[],
1111 classes: &["type ", "class ", "record "],
1112 variables: &[],
1113 imports: &["uses "],
1114 tests: &["procedure Test"],
1116 assertions: &[
1117 "Check(",
1118 "CheckEquals(",
1119 "CheckTrue(",
1120 "CheckFalse(",
1121 "CheckNotNull(",
1122 ],
1123 test_suites: &[],
1124 variables_prefix_no_paren: &[],
1125};
1126
1127const SP_VB: SymbolPatterns = SymbolPatterns {
1128 functions: &[
1129 "Sub ",
1130 "Function ",
1131 "Private Sub ",
1132 "Public Sub ",
1133 "Private Function ",
1134 "Public Function ",
1135 ],
1136 functions_prefix_paren: &[],
1137 classes: &["Class ", "Module ", "Structure "],
1138 variables: &[],
1139 imports: &["Imports "],
1140 tests: &["<TestMethod>", "<TestMethod("],
1142 assertions: &["Assert.", "CollectionAssert.", "StringAssert."],
1143 test_suites: &["<TestClass>", "<TestClass("],
1144 variables_prefix_no_paren: &[],
1145};
1146
1147const SP_LISP: SymbolPatterns = SymbolPatterns {
1148 functions: &["(defun ", "(defmacro ", "(define ", "(defmethod ", "(defn "],
1149 functions_prefix_paren: &[],
1150 classes: &["(defclass ", "(defstruct "],
1151 variables: &[],
1152 imports: &["(require ", "(import ", "(use-package "],
1153 tests: &["(test ", "(deftest "],
1155 assertions: &["(is ", "(is-true ", "(is-false ", "(signals "],
1156 test_suites: &[],
1157 variables_prefix_no_paren: &[],
1158};
1159
1160const SP_FORTRAN: SymbolPatterns = SymbolPatterns {
1162 functions: &["subroutine ", "function "],
1163 functions_prefix_paren: &[],
1164 classes: &["module ", "program ", "type "],
1165 variables: &[],
1166 imports: &["use ", "include "],
1167 tests: &[],
1168 assertions: &[],
1169 test_suites: &[],
1170 variables_prefix_no_paren: &[],
1171};
1172
1173const SP_CRYSTAL: SymbolPatterns = SymbolPatterns {
1174 functions: &["def "],
1175 functions_prefix_paren: &[],
1176 classes: &["class ", "module ", "struct ", "enum "],
1177 variables: &[],
1178 imports: &["require "],
1179 tests: &["it ", "it(", "describe ", "context ", "pending "],
1181 assertions: &[],
1182 test_suites: &[],
1183 variables_prefix_no_paren: &[],
1184};
1185
1186const SP_D: SymbolPatterns = SymbolPatterns {
1187 functions: &[],
1188 functions_prefix_paren: &[],
1189 classes: &["class ", "struct ", "interface ", "enum ", "template "],
1190 variables: &[],
1191 imports: &["import "],
1192 tests: &["unittest"],
1194 assertions: &["assert(", "assertThrown", "assertNotThrown"],
1195 test_suites: &[],
1196 variables_prefix_no_paren: &[],
1197};
1198
1199const SP_CMAKE: SymbolPatterns = SymbolPatterns {
1200 functions: &["function(", "macro("],
1201 functions_prefix_paren: &[],
1202 classes: &[],
1203 variables: &[],
1204 imports: &["include(", "add_subdirectory("],
1205 tests: &[],
1206 assertions: &[],
1207 test_suites: &[],
1208 variables_prefix_no_paren: &[],
1209};
1210
1211const SP_ELM: SymbolPatterns = SymbolPatterns {
1212 functions: &[],
1213 functions_prefix_paren: &[],
1214 classes: &["type "],
1215 variables: &[],
1216 imports: &["import "],
1217 tests: &["test ", "describe ", "fuzz "],
1219 assertions: &["Expect."],
1220 test_suites: &[],
1221 variables_prefix_no_paren: &[],
1222};
1223
1224const SP_AWK: SymbolPatterns = SymbolPatterns {
1225 functions: &["function "],
1226 functions_prefix_paren: &[],
1227 classes: &[],
1228 variables: &[],
1229 imports: &[],
1230 tests: &[],
1231 assertions: &[],
1232 test_suites: &[],
1233 variables_prefix_no_paren: &[],
1234};
1235
1236const SP_RUST: SymbolPatterns = SymbolPatterns {
1237 functions: &[
1238 "fn ",
1239 "pub fn ",
1240 "pub(crate) fn ",
1241 "pub(super) fn ",
1242 "async fn ",
1243 "pub async fn ",
1244 "pub(crate) async fn ",
1245 "unsafe fn ",
1246 "pub unsafe fn ",
1247 "pub(crate) unsafe fn ",
1248 "const fn ",
1249 "pub const fn ",
1250 "pub(crate) const fn ",
1251 "extern fn ",
1252 "pub extern fn ",
1253 ],
1254 functions_prefix_paren: &[],
1255 classes: &[
1256 "struct ",
1257 "pub struct ",
1258 "pub(crate) struct ",
1259 "enum ",
1260 "pub enum ",
1261 "pub(crate) enum ",
1262 "trait ",
1263 "pub trait ",
1264 "pub(crate) trait ",
1265 "impl ",
1266 "impl<",
1267 "type ",
1268 "pub type ",
1269 "pub(crate) type ",
1270 ],
1271 variables: &["let ", "let mut "],
1272 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
1273 tests: &[
1275 "#[test]",
1276 "#[tokio::test]",
1277 "#[actix_web::test]",
1278 "#[rstest]",
1279 "#[test_case",
1280 ],
1281 assertions: &[
1282 "assert_eq!(",
1283 "assert_ne!(",
1284 "assert!(",
1285 "assert_matches!(",
1286 "assert_err!(",
1287 "assert_ok!(",
1288 ],
1289 test_suites: &[],
1290 variables_prefix_no_paren: &[],
1291};
1292
1293const SP_PYTHON: SymbolPatterns = SymbolPatterns {
1294 functions: &["def ", "async def "],
1295 functions_prefix_paren: &[],
1296 classes: &["class "],
1297 variables: &[],
1298 imports: &["import ", "from "],
1299 tests: &["def test_", "async def test_", "class Test"],
1301 assertions: &[
1302 "self.assertEqual(",
1303 "self.assertNotEqual(",
1304 "self.assertTrue(",
1305 "self.assertFalse(",
1306 "self.assertIsNone(",
1307 "self.assertIsNotNone(",
1308 "self.assertIn(",
1309 "self.assertNotIn(",
1310 "self.assertRaises(",
1311 "self.assertAlmostEqual(",
1312 ],
1313 test_suites: &[],
1314 variables_prefix_no_paren: &[],
1315};
1316
1317const SP_JS: SymbolPatterns = SymbolPatterns {
1318 functions: &[
1319 "function ",
1320 "async function ",
1321 "export function ",
1322 "export async function ",
1323 "export default function ",
1324 ],
1325 functions_prefix_paren: &[],
1326 classes: &["class ", "export class ", "export default class "],
1327 variables: &[
1328 "var ",
1329 "let ",
1330 "const ",
1331 "export var ",
1332 "export let ",
1333 "export const ",
1334 ],
1335 imports: &["import "],
1336 tests: &[
1338 "describe(",
1339 "it(",
1340 "test(",
1341 "it.each(",
1342 "test.each(",
1343 "describe.each(",
1344 ],
1345 assertions: &["expect("],
1346 test_suites: &[],
1347 variables_prefix_no_paren: &[],
1348};
1349
1350const SP_TS: SymbolPatterns = SymbolPatterns {
1351 functions: &[
1352 "function ",
1353 "async function ",
1354 "export function ",
1355 "export async function ",
1356 "export default function ",
1357 ],
1358 functions_prefix_paren: &[],
1359 classes: &[
1360 "class ",
1361 "export class ",
1362 "export default class ",
1363 "abstract class ",
1364 "export abstract class ",
1365 "interface ",
1366 "export interface ",
1367 "declare class ",
1368 "declare interface ",
1369 ],
1370 variables: &[
1371 "var ",
1372 "let ",
1373 "const ",
1374 "export var ",
1375 "export let ",
1376 "export const ",
1377 ],
1378 imports: &["import "],
1379 tests: &[
1381 "describe(",
1382 "it(",
1383 "test(",
1384 "it.each(",
1385 "test.each(",
1386 "describe.each(",
1387 ],
1388 assertions: &["expect("],
1389 test_suites: &[],
1390 variables_prefix_no_paren: &[],
1391};
1392
1393const SP_GO: SymbolPatterns = SymbolPatterns {
1394 functions: &["func "],
1395 functions_prefix_paren: &[],
1396 classes: &["type "],
1397 variables: &["var "],
1398 imports: &["import "],
1399 tests: &["func Test", "func Benchmark", "func Fuzz"],
1401 assertions: &[],
1402 test_suites: &[],
1403 variables_prefix_no_paren: &[],
1404};
1405
1406const SP_JAVA: SymbolPatterns = SymbolPatterns {
1407 functions: &[],
1408 functions_prefix_paren: &[],
1409 classes: &[
1410 "class ",
1411 "public class ",
1412 "private class ",
1413 "protected class ",
1414 "abstract class ",
1415 "final class ",
1416 "public abstract class ",
1417 "public final class ",
1418 "interface ",
1419 "public interface ",
1420 "enum ",
1421 "public enum ",
1422 "record ",
1423 "public record ",
1424 "@interface ",
1425 ],
1426 variables: &[],
1427 imports: &["import "],
1428 tests: &[
1430 "@Test",
1431 "@ParameterizedTest",
1432 "@RepeatedTest",
1433 "@TestFactory",
1434 "@TestTemplate",
1435 ],
1436 assertions: &[
1437 "assertEquals(",
1438 "assertNotEquals(",
1439 "assertTrue(",
1440 "assertFalse(",
1441 "assertNull(",
1442 "assertNotNull(",
1443 "assertThat(",
1444 "assertThrows(",
1445 "assertAll(",
1446 "assertArrayEquals(",
1447 "assertIterableEquals(",
1448 "assertLinesMatch(",
1449 ],
1450 test_suites: &[],
1451 variables_prefix_no_paren: &[],
1452};
1453
1454const SP_CSHARP: SymbolPatterns = SymbolPatterns {
1455 functions: &[],
1456 functions_prefix_paren: &[],
1457 classes: &[
1458 "class ",
1459 "public class ",
1460 "private class ",
1461 "protected class ",
1462 "internal class ",
1463 "abstract class ",
1464 "sealed class ",
1465 "static class ",
1466 "partial class ",
1467 "public abstract class ",
1468 "public sealed class ",
1469 "public static class ",
1470 "interface ",
1471 "public interface ",
1472 "internal interface ",
1473 "enum ",
1474 "public enum ",
1475 "struct ",
1476 "public struct ",
1477 "record ",
1478 "public record ",
1479 ],
1480 variables: &["var "],
1481 imports: &["using "],
1482 tests: &[
1484 "[TestMethod]",
1485 "[Test]",
1486 "[Fact]",
1487 "[Theory]",
1488 "[TestCase(",
1489 "[DataRow(",
1490 "[InlineData(",
1491 "[MemberData(",
1492 ],
1493 assertions: &[
1494 "Assert.AreEqual(",
1495 "Assert.AreNotEqual(",
1496 "Assert.IsTrue(",
1497 "Assert.IsFalse(",
1498 "Assert.IsNull(",
1499 "Assert.IsNotNull(",
1500 "Assert.Equal(",
1501 "Assert.NotEqual(",
1502 "Assert.True(",
1503 "Assert.False(",
1504 "Assert.That(",
1505 "Assert.Contains(",
1506 "Assert.Throws(",
1507 "Assert.ThrowsAsync(",
1508 "Assert.IsInstanceOfType(",
1509 ],
1510 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
1511 variables_prefix_no_paren: &[],
1512};
1513
1514const TEST_PATTERNS_C_CPP: &[&str] = &[
1516 "TEST(",
1518 "TEST_F(",
1519 "TEST_P(",
1520 "TYPED_TEST(",
1521 "TYPED_TEST_P(",
1522 "INSTANTIATE_TEST_SUITE_P(",
1523 "INSTANTIATE_TYPED_TEST_SUITE_P(",
1524 "TEST_CASE(",
1526 "SECTION(",
1527 "SCENARIO(",
1528 "SCENARIO_METHOD(",
1529 "TEST_CASE_METHOD(",
1530 "BOOST_AUTO_TEST_CASE(",
1532 "BOOST_FIXTURE_TEST_CASE(",
1533 "BOOST_AUTO_TEST_SUITE(",
1534 "BOOST_PARAM_TEST_CASE(",
1535 "CPPUNIT_TEST(",
1537 "CPPUNIT_TEST_SUITE(",
1538 "RUN_TEST(",
1540 "TEST_IGNORE(",
1541 "TEST_FAIL(",
1542 "START_TEST(",
1544 "tcase_add_test(",
1545 "suite_create(",
1546 "cmocka_unit_test(",
1548 "cmocka_run_group_tests(",
1549 "IGNORE_TEST(",
1551 "TEST_GROUP(",
1552 "TEST_GROUP_BASE(",
1553];
1554
1555const ASSERT_PATTERNS_C_CPP: &[&str] = &[
1557 "ASSERT_EQ(",
1559 "ASSERT_NE(",
1560 "ASSERT_LT(",
1561 "ASSERT_LE(",
1562 "ASSERT_GT(",
1563 "ASSERT_GE(",
1564 "ASSERT_TRUE(",
1565 "ASSERT_FALSE(",
1566 "ASSERT_STREQ(",
1567 "ASSERT_STRNE(",
1568 "ASSERT_FLOAT_EQ(",
1569 "ASSERT_DOUBLE_EQ(",
1570 "ASSERT_NEAR(",
1571 "ASSERT_THROW(",
1572 "ASSERT_NO_THROW(",
1573 "ASSERT_ANY_THROW(",
1574 "EXPECT_EQ(",
1576 "EXPECT_NE(",
1577 "EXPECT_LT(",
1578 "EXPECT_LE(",
1579 "EXPECT_GT(",
1580 "EXPECT_GE(",
1581 "EXPECT_TRUE(",
1582 "EXPECT_FALSE(",
1583 "EXPECT_STREQ(",
1584 "EXPECT_STRNE(",
1585 "EXPECT_FLOAT_EQ(",
1586 "EXPECT_DOUBLE_EQ(",
1587 "EXPECT_NEAR(",
1588 "EXPECT_THROW(",
1589 "EXPECT_NO_THROW(",
1590 "EXPECT_ANY_THROW(",
1591 "REQUIRE(",
1593 "CHECK(",
1594 "REQUIRE_FALSE(",
1595 "CHECK_FALSE(",
1596 "REQUIRE_NOTHROW(",
1597 "CHECK_NOTHROW(",
1598 "REQUIRE_THROWS(",
1599 "CHECK_THROWS(",
1600 "REQUIRE_THAT(",
1601 "CHECK_THAT(",
1602 "TEST_ASSERT_EQUAL(",
1604 "TEST_ASSERT_EQUAL_INT(",
1605 "TEST_ASSERT_EQUAL_STRING(",
1606 "TEST_ASSERT_EQUAL_FLOAT(",
1607 "TEST_ASSERT_EQUAL_DOUBLE(",
1608 "TEST_ASSERT_EQUAL_PTR(",
1609 "TEST_ASSERT_TRUE(",
1610 "TEST_ASSERT_FALSE(",
1611 "TEST_ASSERT_NULL(",
1612 "TEST_ASSERT_NOT_NULL(",
1613 "TEST_ASSERT_BITS_HIGH(",
1614 "TEST_ASSERT_BITS_LOW(",
1615 "assert_int_equal(",
1617 "assert_int_not_equal(",
1618 "assert_string_equal(",
1619 "assert_string_not_equal(",
1620 "assert_true(",
1621 "assert_false(",
1622 "assert_null(",
1623 "assert_non_null(",
1624 "assert_ptr_equal(",
1625 "assert_memory_equal(",
1626 "assert_return_code(",
1627];
1628
1629const SUITE_PATTERNS_C_CPP: &[&str] = &[
1631 "TEST_GROUP(",
1632 "TEST_GROUP_BASE(",
1633 "BOOST_AUTO_TEST_SUITE(",
1634 "CPPUNIT_TEST_SUITE(",
1635 "CPPUNIT_TEST_SUITE_END(",
1636];
1637
1638const SP_C: SymbolPatterns = SymbolPatterns {
1639 functions: &[],
1641 functions_prefix_paren: &[
1642 "void ",
1643 "int ",
1644 "char ",
1645 "float ",
1646 "double ",
1647 "long ",
1648 "unsigned ",
1649 "size_t ",
1650 "static ",
1651 "inline ",
1652 "const ",
1653 "extern ",
1654 ],
1655 classes: &[
1656 "struct ",
1657 "typedef struct ",
1658 "union ",
1659 "typedef union ",
1660 "typedef enum ",
1661 ],
1662 variables: &[],
1663 imports: &["#include "],
1664 tests: TEST_PATTERNS_C_CPP,
1665 assertions: ASSERT_PATTERNS_C_CPP,
1666 test_suites: SUITE_PATTERNS_C_CPP,
1667 variables_prefix_no_paren: &[
1670 "void ",
1671 "int ",
1672 "char ",
1673 "float ",
1674 "double ",
1675 "long ",
1676 "unsigned ",
1677 "size_t ",
1678 "static ",
1679 "inline ",
1680 "const ",
1681 "extern ",
1682 ],
1683};
1684
1685const SP_CPP: SymbolPatterns = SymbolPatterns {
1686 functions: &[
1688 "virtual ", "explicit ", "~", "operator", ],
1693 functions_prefix_paren: &[
1694 "void ",
1695 "bool ",
1696 "int ",
1697 "char ",
1698 "float ",
1699 "double ",
1700 "long ",
1701 "unsigned ",
1702 "size_t ",
1703 "auto ",
1704 "static ",
1705 "inline ",
1706 "constexpr ",
1707 "const ",
1708 "extern ",
1709 ],
1710 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1712 variables: &[],
1713 imports: &["#include "],
1714 tests: TEST_PATTERNS_C_CPP,
1715 assertions: ASSERT_PATTERNS_C_CPP,
1716 test_suites: SUITE_PATTERNS_C_CPP,
1717 variables_prefix_no_paren: &[
1719 "void ",
1720 "bool ",
1721 "int ",
1722 "char ",
1723 "float ",
1724 "double ",
1725 "long ",
1726 "unsigned ",
1727 "size_t ",
1728 "auto ",
1729 "static ",
1730 "inline ",
1731 "constexpr ",
1732 "const ",
1733 "extern ",
1734 ],
1735};
1736
1737const SP_SHELL: SymbolPatterns = SymbolPatterns {
1738 functions: &["function "],
1739 functions_prefix_paren: &[],
1740 classes: &[],
1741 variables: &["declare ", "local ", "export "],
1742 imports: &["source ", ". "],
1743 tests: &["@test "],
1745 assertions: &[],
1746 test_suites: &[],
1747 variables_prefix_no_paren: &[],
1748};
1749
1750const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1751 functions: &["function ", "Function "],
1752 functions_prefix_paren: &[],
1753 classes: &["class "],
1754 variables: &[],
1755 imports: &["Import-Module ", "using "],
1756 tests: &["Describe ", "It ", "Context "],
1758 assertions: &[],
1759 test_suites: &[],
1760 variables_prefix_no_paren: &[],
1761};
1762
1763const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1764 functions: &[
1765 "fun ",
1766 "private fun ",
1767 "public fun ",
1768 "protected fun ",
1769 "internal fun ",
1770 "override fun ",
1771 "suspend fun ",
1772 "abstract fun ",
1773 "open fun ",
1774 "private suspend fun ",
1775 "public suspend fun ",
1776 ],
1777 functions_prefix_paren: &[],
1778 classes: &[
1779 "class ",
1780 "data class ",
1781 "sealed class ",
1782 "abstract class ",
1783 "open class ",
1784 "object ",
1785 "companion object",
1786 "interface ",
1787 "enum class ",
1788 "annotation class ",
1789 ],
1790 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1791 imports: &["import "],
1792 tests: &[
1794 "@Test",
1795 "@ParameterizedTest",
1796 "@RepeatedTest",
1797 "\"should ",
1798 "\"it ",
1799 ],
1800 assertions: &[
1801 "assertEquals(",
1802 "assertNotEquals(",
1803 "assertTrue(",
1804 "assertFalse(",
1805 "assertNull(",
1806 "assertNotNull(",
1807 "assertThat(",
1808 "assertThrows(",
1809 "shouldBe(",
1810 "shouldNotBe(",
1811 "shouldThrow(",
1812 ],
1813 test_suites: &[],
1814 variables_prefix_no_paren: &[],
1815};
1816
1817const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1818 functions: &[
1819 "func ",
1820 "private func ",
1821 "public func ",
1822 "internal func ",
1823 "override func ",
1824 "open func ",
1825 "static func ",
1826 "class func ",
1827 "mutating func ",
1828 "private static func ",
1829 "public static func ",
1830 ],
1831 functions_prefix_paren: &[],
1832 classes: &[
1833 "class ",
1834 "struct ",
1835 "protocol ",
1836 "enum ",
1837 "extension ",
1838 "actor ",
1839 "public class ",
1840 "private class ",
1841 "open class ",
1842 "final class ",
1843 "public struct ",
1844 "private struct ",
1845 "public protocol ",
1846 ],
1847 variables: &[
1848 "var ",
1849 "let ",
1850 "private var ",
1851 "private let ",
1852 "static var ",
1853 "static let ",
1854 ],
1855 imports: &["import "],
1856 tests: &["func test", "func Test", "@Test"],
1858 assertions: &[
1859 "XCTAssertEqual(",
1860 "XCTAssertNotEqual(",
1861 "XCTAssertTrue(",
1862 "XCTAssertFalse(",
1863 "XCTAssertNil(",
1864 "XCTAssertNotNil(",
1865 "XCTAssertGreaterThan(",
1866 "XCTAssertLessThan(",
1867 "XCTAssertThrowsError(",
1868 "XCTAssertNoThrow(",
1869 "#expect(",
1870 ],
1871 test_suites: &[],
1872 variables_prefix_no_paren: &[],
1873};
1874
1875const SP_RUBY: SymbolPatterns = SymbolPatterns {
1876 functions: &["def ", "private def ", "protected def "],
1877 functions_prefix_paren: &[],
1878 classes: &["class ", "module "],
1879 variables: &[],
1880 imports: &["require ", "require_relative "],
1881 tests: &["it ", "it(", "describe ", "context ", "test "],
1883 assertions: &[],
1884 test_suites: &[],
1885 variables_prefix_no_paren: &[],
1886};
1887
1888const SP_SCALA: SymbolPatterns = SymbolPatterns {
1889 functions: &["def ", "private def ", "protected def ", "override def "],
1890 functions_prefix_paren: &[],
1891 classes: &[
1892 "class ",
1893 "case class ",
1894 "abstract class ",
1895 "sealed class ",
1896 "object ",
1897 "trait ",
1898 ],
1899 variables: &["val ", "var ", "lazy val "],
1900 imports: &["import "],
1901 tests: &["test(", "it(", "describe("],
1903 assertions: &[],
1904 test_suites: &[],
1905 variables_prefix_no_paren: &[],
1906};
1907
1908const SP_PHP: SymbolPatterns = SymbolPatterns {
1909 functions: &[
1910 "function ",
1911 "public function ",
1912 "private function ",
1913 "protected function ",
1914 "static function ",
1915 "abstract function ",
1916 "final function ",
1917 "public static function ",
1918 "private static function ",
1919 "protected static function ",
1920 ],
1921 functions_prefix_paren: &[],
1922 classes: &[
1923 "class ",
1924 "abstract class ",
1925 "final class ",
1926 "interface ",
1927 "trait ",
1928 "enum ",
1929 ],
1930 variables: &[],
1931 imports: &[
1932 "use ",
1933 "require ",
1934 "require_once ",
1935 "include ",
1936 "include_once ",
1937 ],
1938 tests: &[
1940 "public function test",
1941 "function test",
1942 "#[Test]",
1943 "#[DataProvider(",
1944 ],
1945 assertions: &[],
1946 test_suites: &[],
1947 variables_prefix_no_paren: &[],
1948};
1949
1950const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1951 functions: &[
1952 "def ",
1953 "defp ",
1954 "defmacro ",
1955 "defmacrop ",
1956 "defguard ",
1957 "defguardp ",
1958 ],
1959 functions_prefix_paren: &[],
1960 classes: &["defmodule ", "defprotocol ", "defimpl "],
1961 variables: &[],
1962 imports: &["import ", "alias ", "use ", "require "],
1963 tests: &["test ", "describe "],
1965 assertions: &[],
1966 test_suites: &[],
1967 variables_prefix_no_paren: &[],
1968};
1969
1970const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1971 functions: &[],
1972 functions_prefix_paren: &[],
1973 classes: &["-module("],
1974 variables: &[],
1975 imports: &["-import(", "-include(", "-include_lib("],
1976 tests: &[],
1979 assertions: &[
1980 "?assert(",
1981 "?assertEqual(",
1982 "?assertNotEqual(",
1983 "?assertMatch(",
1984 "?assertError(",
1985 "?assertThrow(",
1986 "?assertException(",
1987 ],
1988 test_suites: &[],
1989 variables_prefix_no_paren: &[],
1990};
1991
1992const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1993 functions: &[
1994 "let ",
1995 "let rec ",
1996 "member ",
1997 "override ",
1998 "abstract member ",
1999 ],
2000 functions_prefix_paren: &[],
2001 classes: &["type "],
2002 variables: &["let mutable "],
2003 imports: &["open "],
2004 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
2006 assertions: &[],
2007 test_suites: &[],
2008 variables_prefix_no_paren: &[],
2009};
2010
2011const SP_GROOVY: SymbolPatterns = SymbolPatterns {
2012 functions: &["def ", "private def ", "public def ", "protected def "],
2013 functions_prefix_paren: &[],
2014 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
2015 variables: &[],
2016 imports: &["import "],
2017 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
2019 assertions: &[],
2020 test_suites: &[],
2021 variables_prefix_no_paren: &[],
2022};
2023
2024const SP_HASKELL: SymbolPatterns = SymbolPatterns {
2025 functions: &[],
2026 functions_prefix_paren: &[],
2027 classes: &["class ", "data ", "newtype ", "type "],
2028 variables: &[],
2029 imports: &["import "],
2030 tests: &["describe ", "it ", "prop_"],
2033 assertions: &[],
2034 test_suites: &[],
2035 variables_prefix_no_paren: &[],
2036};
2037
2038const SP_LUA: SymbolPatterns = SymbolPatterns {
2039 functions: &["function ", "local function "],
2040 functions_prefix_paren: &[],
2041 classes: &[],
2042 variables: &["local "],
2043 imports: &[],
2044 tests: &["it(", "describe(", "pending("],
2046 assertions: &[],
2047 test_suites: &[],
2048 variables_prefix_no_paren: &[],
2049};
2050
2051const SP_NIM: SymbolPatterns = SymbolPatterns {
2052 functions: &[
2053 "proc ",
2054 "func ",
2055 "method ",
2056 "iterator ",
2057 "converter ",
2058 "template ",
2059 "macro ",
2060 ],
2061 functions_prefix_paren: &[],
2062 classes: &["type "],
2063 variables: &["var ", "let ", "const "],
2064 imports: &["import ", "from "],
2065 tests: &["test "],
2067 assertions: &[],
2068 test_suites: &[],
2069 variables_prefix_no_paren: &[],
2070};
2071
2072const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
2073 functions: &["- (", "+ ("],
2074 functions_prefix_paren: &[],
2075 classes: &["@interface ", "@implementation ", "@protocol "],
2076 variables: &[],
2077 imports: &["#import ", "#include "],
2078 tests: &["- (void)test"],
2080 assertions: &[
2081 "XCTAssertEqual(",
2082 "XCTAssertNotEqual(",
2083 "XCTAssertTrue(",
2084 "XCTAssertFalse(",
2085 "XCTAssertNil(",
2086 "XCTAssertNotNil(",
2087 "XCTAssertGreaterThan(",
2088 "XCTAssertLessThan(",
2089 "XCTAssertThrowsError(",
2090 "XCTAssertNoThrow(",
2091 ],
2092 test_suites: &[],
2093 variables_prefix_no_paren: &[],
2094};
2095
2096const SP_OCAML: SymbolPatterns = SymbolPatterns {
2097 functions: &["let ", "let rec "],
2098 functions_prefix_paren: &[],
2099 classes: &["type ", "module ", "class "],
2100 variables: &[],
2101 imports: &["open "],
2102 tests: &["let test_", "test_case "],
2104 assertions: &[
2105 "assert_equal ",
2106 "assert_bool ",
2107 "assert_raises ",
2108 "assert_failure ",
2109 "OUnit.assert",
2110 ],
2111 test_suites: &[],
2112 variables_prefix_no_paren: &[],
2113};
2114
2115const SP_PERL: SymbolPatterns = SymbolPatterns {
2116 functions: &["sub "],
2117 functions_prefix_paren: &[],
2118 classes: &["package "],
2119 variables: &["my ", "our ", "local "],
2120 imports: &["use ", "require "],
2121 tests: &["subtest "],
2123 assertions: &[
2124 "ok(",
2125 "is(",
2126 "isnt(",
2127 "like(",
2128 "unlike(",
2129 "cmp_ok(",
2130 "is_deeply(",
2131 "isa_ok(",
2132 "can_ok(",
2133 ],
2134 test_suites: &[],
2135 variables_prefix_no_paren: &[],
2136};
2137
2138const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
2139 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
2140 functions_prefix_paren: &[],
2141 classes: &[
2142 "(defrecord ",
2143 "(defprotocol ",
2144 "(deftype ",
2145 "(definterface ",
2146 ],
2147 variables: &["(def ", "(defonce "],
2148 imports: &["(ns ", "(require "],
2149 tests: &["(deftest ", "(testing "],
2151 assertions: &[],
2152 test_suites: &[],
2153 variables_prefix_no_paren: &[],
2154};
2155
2156const SP_JULIA: SymbolPatterns = SymbolPatterns {
2157 functions: &["function ", "macro "],
2158 functions_prefix_paren: &[],
2159 classes: &[
2160 "struct ",
2161 "mutable struct ",
2162 "abstract type ",
2163 "primitive type ",
2164 ],
2165 variables: &["const "],
2166 imports: &["import ", "using "],
2167 tests: &["@test ", "@testset "],
2169 assertions: &[],
2170 test_suites: &[],
2171 variables_prefix_no_paren: &[],
2172};
2173
2174const SP_DART: SymbolPatterns = SymbolPatterns {
2175 functions: &[],
2176 functions_prefix_paren: &[],
2177 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
2178 variables: &["var ", "final ", "const ", "late "],
2179 imports: &["import "],
2180 tests: &["test(", "testWidgets(", "group("],
2182 assertions: &[],
2183 test_suites: &[],
2184 variables_prefix_no_paren: &[],
2185};
2186
2187const SP_R: SymbolPatterns = SymbolPatterns {
2188 functions: &[],
2189 functions_prefix_paren: &[],
2190 classes: &[],
2191 variables: &[],
2192 imports: &["library(", "source("],
2193 tests: &["test_that(", "it(", "describe(", "expect_"],
2195 assertions: &[],
2196 test_suites: &[],
2197 variables_prefix_no_paren: &[],
2198};
2199
2200const SP_SQL: SymbolPatterns = SymbolPatterns {
2201 functions: &[
2202 "create function ",
2203 "create or replace function ",
2204 "create procedure ",
2205 "create or replace procedure ",
2206 "CREATE FUNCTION ",
2207 "CREATE OR REPLACE FUNCTION ",
2208 "CREATE PROCEDURE ",
2209 "CREATE OR REPLACE PROCEDURE ",
2210 ],
2211 functions_prefix_paren: &[],
2212 classes: &[
2213 "create table ",
2214 "create view ",
2215 "create schema ",
2216 "CREATE TABLE ",
2217 "CREATE VIEW ",
2218 "CREATE SCHEMA ",
2219 ],
2220 variables: &["declare ", "DECLARE "],
2221 imports: &[],
2222 tests: &[],
2223 assertions: &[],
2224 test_suites: &[],
2225 variables_prefix_no_paren: &[],
2226};
2227
2228const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
2229 functions: &["proc ", "PROC "],
2230 functions_prefix_paren: &[],
2231 classes: &[],
2232 variables: &[],
2233 imports: &["include ", "INCLUDE ", "%include "],
2234 tests: &[],
2235 assertions: &[],
2236 test_suites: &[],
2237 variables_prefix_no_paren: &[],
2238};
2239
2240const SP_ZIG: SymbolPatterns = SymbolPatterns {
2241 functions: &[
2242 "fn ",
2243 "pub fn ",
2244 "export fn ",
2245 "inline fn ",
2246 "pub inline fn ",
2247 ],
2248 functions_prefix_paren: &[],
2249 classes: &[],
2250 variables: &["var ", "pub var "],
2251 imports: &[],
2252 tests: &["test \"", "test{"],
2254 assertions: &[],
2255 test_suites: &[],
2256 variables_prefix_no_paren: &[],
2257};
2258
2259#[allow(clippy::struct_excessive_bools)]
2263#[derive(Clone, Copy)]
2264struct StaticLangConfig {
2265 line_comments: &'static [&'static str],
2266 block_comment: Option<(&'static str, &'static str)>,
2267 allow_single_quote_strings: bool,
2268 allow_double_quote_strings: bool,
2269 allow_triple_quote_strings: bool,
2270 allow_csharp_verbatim_strings: bool,
2271 symbol_patterns: SymbolPatterns,
2272 has_preprocessor: bool,
2274}
2275
2276#[allow(clippy::struct_excessive_bools)]
2277#[derive(Debug, Clone)]
2278struct ScanConfig {
2279 line_comments: &'static [&'static str],
2280 block_comment: Option<(&'static str, &'static str)>,
2281 allow_single_quote_strings: bool,
2282 allow_double_quote_strings: bool,
2283 allow_triple_quote_strings: bool,
2284 allow_csharp_verbatim_strings: bool,
2285 skip_lines: HashSet<usize>,
2286 symbol_patterns: SymbolPatterns,
2287 branch_keywords: &'static [&'static str],
2289 lsloc_strategy: LslocStrategy,
2291}
2292
2293const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
2303 line_comments: &["//"],
2304 block_comment: Some(("/*", "*/")),
2305 allow_single_quote_strings: true,
2306 allow_double_quote_strings: true,
2307 allow_triple_quote_strings: false,
2308 allow_csharp_verbatim_strings: false,
2309 symbol_patterns: SP_NONE,
2310 has_preprocessor: false,
2311};
2312
2313const HASH_BASE: StaticLangConfig = StaticLangConfig {
2317 line_comments: &["#"],
2318 block_comment: None,
2319 allow_single_quote_strings: true,
2320 allow_double_quote_strings: true,
2321 allow_triple_quote_strings: false,
2322 allow_csharp_verbatim_strings: false,
2323 symbol_patterns: SP_NONE,
2324 has_preprocessor: false,
2325};
2326
2327static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
2331 (
2333 Language::C,
2334 StaticLangConfig {
2335 symbol_patterns: SP_C,
2336 has_preprocessor: true,
2337 ..C_SLASH_BASE
2338 },
2339 ),
2340 (
2341 Language::Cpp,
2342 StaticLangConfig {
2343 symbol_patterns: SP_CPP,
2344 has_preprocessor: true,
2345 ..C_SLASH_BASE
2346 },
2347 ),
2348 (
2349 Language::ObjectiveC,
2350 StaticLangConfig {
2351 symbol_patterns: SP_OBJECTIVEC,
2352 has_preprocessor: true,
2353 ..C_SLASH_BASE
2354 },
2355 ),
2356 (
2358 Language::CSharp,
2359 StaticLangConfig {
2360 symbol_patterns: SP_CSHARP,
2361 allow_csharp_verbatim_strings: true,
2362 ..C_SLASH_BASE
2363 },
2364 ),
2365 (
2366 Language::Go,
2367 StaticLangConfig {
2368 symbol_patterns: SP_GO,
2369 ..C_SLASH_BASE
2370 },
2371 ),
2372 (
2373 Language::Java,
2374 StaticLangConfig {
2375 symbol_patterns: SP_JAVA,
2376 ..C_SLASH_BASE
2377 },
2378 ),
2379 (
2380 Language::JavaScript,
2381 StaticLangConfig {
2382 symbol_patterns: SP_JS,
2383 ..C_SLASH_BASE
2384 },
2385 ),
2386 (
2387 Language::TypeScript,
2388 StaticLangConfig {
2389 symbol_patterns: SP_TS,
2390 ..C_SLASH_BASE
2391 },
2392 ),
2393 (
2394 Language::Svelte,
2395 StaticLangConfig {
2396 symbol_patterns: SP_JS,
2397 ..C_SLASH_BASE
2398 },
2399 ),
2400 (
2401 Language::Vue,
2402 StaticLangConfig {
2403 symbol_patterns: SP_JS,
2404 ..C_SLASH_BASE
2405 },
2406 ),
2407 (
2408 Language::Dart,
2409 StaticLangConfig {
2410 symbol_patterns: SP_DART,
2411 ..C_SLASH_BASE
2412 },
2413 ),
2414 (
2415 Language::Groovy,
2416 StaticLangConfig {
2417 symbol_patterns: SP_GROOVY,
2418 ..C_SLASH_BASE
2419 },
2420 ),
2421 (
2422 Language::Kotlin,
2423 StaticLangConfig {
2424 symbol_patterns: SP_KOTLIN,
2425 ..C_SLASH_BASE
2426 },
2427 ),
2428 (
2429 Language::Scala,
2430 StaticLangConfig {
2431 symbol_patterns: SP_SCALA,
2432 ..C_SLASH_BASE
2433 },
2434 ),
2435 (
2436 Language::Scss,
2437 StaticLangConfig {
2438 symbol_patterns: SP_NONE,
2439 ..C_SLASH_BASE
2440 },
2441 ),
2442 (
2444 Language::Rust,
2445 StaticLangConfig {
2446 symbol_patterns: SP_RUST,
2447 allow_single_quote_strings: false,
2448 ..C_SLASH_BASE
2449 },
2450 ),
2451 (
2453 Language::Swift,
2454 StaticLangConfig {
2455 symbol_patterns: SP_SWIFT,
2456 allow_single_quote_strings: false,
2457 ..C_SLASH_BASE
2458 },
2459 ),
2460 (
2462 Language::Zig,
2463 StaticLangConfig {
2464 symbol_patterns: SP_ZIG,
2465 block_comment: None,
2466 ..C_SLASH_BASE
2467 },
2468 ),
2469 (
2471 Language::FSharp,
2472 StaticLangConfig {
2473 line_comments: &["//"],
2474 block_comment: Some(("(*", "*)")),
2475 allow_single_quote_strings: false,
2476 allow_double_quote_strings: true,
2477 symbol_patterns: SP_FSHARP,
2478 ..C_SLASH_BASE
2479 },
2480 ),
2481 (
2483 Language::Shell,
2484 StaticLangConfig {
2485 symbol_patterns: SP_SHELL,
2486 ..HASH_BASE
2487 },
2488 ),
2489 (
2490 Language::Elixir,
2491 StaticLangConfig {
2492 symbol_patterns: SP_ELIXIR,
2493 ..HASH_BASE
2494 },
2495 ),
2496 (
2497 Language::Perl,
2498 StaticLangConfig {
2499 symbol_patterns: SP_PERL,
2500 ..HASH_BASE
2501 },
2502 ),
2503 (
2504 Language::R,
2505 StaticLangConfig {
2506 symbol_patterns: SP_R,
2507 ..HASH_BASE
2508 },
2509 ),
2510 (
2511 Language::Ruby,
2512 StaticLangConfig {
2513 symbol_patterns: SP_RUBY,
2514 ..HASH_BASE
2515 },
2516 ),
2517 (
2519 Language::Python,
2520 StaticLangConfig {
2521 symbol_patterns: SP_PYTHON,
2522 allow_triple_quote_strings: true,
2523 ..HASH_BASE
2524 },
2525 ),
2526 (
2528 Language::PowerShell,
2529 StaticLangConfig {
2530 symbol_patterns: SP_POWERSHELL,
2531 block_comment: Some(("<#", "#>")),
2532 ..HASH_BASE
2533 },
2534 ),
2535 (
2537 Language::Nim,
2538 StaticLangConfig {
2539 symbol_patterns: SP_NIM,
2540 block_comment: Some(("#[", "]#")),
2541 ..HASH_BASE
2542 },
2543 ),
2544 (
2546 Language::Makefile,
2547 StaticLangConfig {
2548 symbol_patterns: SP_NONE,
2549 allow_single_quote_strings: false,
2550 allow_double_quote_strings: false,
2551 ..HASH_BASE
2552 },
2553 ),
2554 (
2555 Language::Dockerfile,
2556 StaticLangConfig {
2557 symbol_patterns: SP_NONE,
2558 allow_single_quote_strings: false,
2559 allow_double_quote_strings: false,
2560 ..HASH_BASE
2561 },
2562 ),
2563 (
2566 Language::Css,
2567 StaticLangConfig {
2568 line_comments: &[],
2569 block_comment: Some(("/*", "*/")),
2570 symbol_patterns: SP_NONE,
2571 ..C_SLASH_BASE
2572 },
2573 ),
2574 (
2576 Language::Html,
2577 StaticLangConfig {
2578 line_comments: &[],
2579 block_comment: Some(("<!--", "-->")),
2580 allow_single_quote_strings: false,
2581 allow_double_quote_strings: false,
2582 symbol_patterns: SP_NONE,
2583 ..C_SLASH_BASE
2584 },
2585 ),
2586 (
2587 Language::Xml,
2588 StaticLangConfig {
2589 line_comments: &[],
2590 block_comment: Some(("<!--", "-->")),
2591 allow_single_quote_strings: false,
2592 allow_double_quote_strings: false,
2593 symbol_patterns: SP_NONE,
2594 ..C_SLASH_BASE
2595 },
2596 ),
2597 (
2599 Language::Lua,
2600 StaticLangConfig {
2601 line_comments: &["--"],
2602 block_comment: Some(("--[[", "]]")),
2603 symbol_patterns: SP_LUA,
2604 ..C_SLASH_BASE
2605 },
2606 ),
2607 (
2609 Language::Haskell,
2610 StaticLangConfig {
2611 line_comments: &["--"],
2612 block_comment: Some(("{-", "-}")),
2613 symbol_patterns: SP_HASKELL,
2614 ..C_SLASH_BASE
2615 },
2616 ),
2617 (
2619 Language::Sql,
2620 StaticLangConfig {
2621 line_comments: &["--"],
2622 block_comment: Some(("/*", "*/")),
2623 allow_single_quote_strings: true,
2624 allow_double_quote_strings: false,
2625 symbol_patterns: SP_SQL,
2626 ..C_SLASH_BASE
2627 },
2628 ),
2629 (
2631 Language::Ocaml,
2632 StaticLangConfig {
2633 line_comments: &[],
2634 block_comment: Some(("(*", "*)")),
2635 allow_single_quote_strings: false,
2636 symbol_patterns: SP_OCAML,
2637 ..C_SLASH_BASE
2638 },
2639 ),
2640 (
2646 Language::Assembly,
2647 StaticLangConfig {
2648 line_comments: &[";"],
2649 block_comment: Some(("/*", "*/")),
2650 allow_single_quote_strings: false,
2651 allow_double_quote_strings: true,
2652 symbol_patterns: SP_ASSEMBLY,
2653 ..C_SLASH_BASE
2654 },
2655 ),
2656 (
2657 Language::Clojure,
2658 StaticLangConfig {
2659 line_comments: &[";"],
2660 block_comment: None,
2661 allow_single_quote_strings: false,
2662 symbol_patterns: SP_CLOJURE,
2663 ..C_SLASH_BASE
2664 },
2665 ),
2666 (
2668 Language::Erlang,
2669 StaticLangConfig {
2670 line_comments: &["%"],
2671 block_comment: None,
2672 allow_single_quote_strings: false,
2673 symbol_patterns: SP_ERLANG,
2674 ..C_SLASH_BASE
2675 },
2676 ),
2677 (
2679 Language::Php,
2680 StaticLangConfig {
2681 line_comments: &["//", "#"],
2682 block_comment: Some(("/*", "*/")),
2683 symbol_patterns: SP_PHP,
2684 ..C_SLASH_BASE
2685 },
2686 ),
2687 (
2689 Language::Julia,
2690 StaticLangConfig {
2691 line_comments: &["#"],
2692 block_comment: Some(("#=", "=#")),
2693 allow_single_quote_strings: false,
2694 allow_triple_quote_strings: true,
2695 symbol_patterns: SP_JULIA,
2696 ..C_SLASH_BASE
2697 },
2698 ),
2699 (
2702 Language::Solidity,
2703 StaticLangConfig {
2704 symbol_patterns: SP_SOLIDITY,
2705 ..C_SLASH_BASE
2706 },
2707 ),
2708 (
2710 Language::Protobuf,
2711 StaticLangConfig {
2712 symbol_patterns: SP_PROTOBUF,
2713 ..C_SLASH_BASE
2714 },
2715 ),
2716 (
2718 Language::Hcl,
2719 StaticLangConfig {
2720 line_comments: &["#", "//"],
2721 allow_single_quote_strings: false,
2722 symbol_patterns: SP_NONE,
2723 ..C_SLASH_BASE
2724 },
2725 ),
2726 (
2728 Language::GraphQl,
2729 StaticLangConfig {
2730 allow_single_quote_strings: false,
2731 allow_triple_quote_strings: true,
2732 symbol_patterns: SP_NONE,
2733 ..HASH_BASE
2734 },
2735 ),
2736 (
2739 Language::Ada,
2740 StaticLangConfig {
2741 line_comments: &["--"],
2742 block_comment: None,
2743 allow_single_quote_strings: false,
2744 symbol_patterns: SP_ADA,
2745 ..C_SLASH_BASE
2746 },
2747 ),
2748 (
2750 Language::Vhdl,
2751 StaticLangConfig {
2752 line_comments: &["--"],
2753 block_comment: None,
2754 allow_single_quote_strings: false,
2755 symbol_patterns: SP_VHDL,
2756 ..C_SLASH_BASE
2757 },
2758 ),
2759 (
2761 Language::Verilog,
2762 StaticLangConfig {
2763 allow_single_quote_strings: false,
2764 symbol_patterns: SP_VERILOG,
2765 ..C_SLASH_BASE
2766 },
2767 ),
2768 (
2770 Language::Tcl,
2771 StaticLangConfig {
2772 allow_single_quote_strings: false,
2773 symbol_patterns: SP_TCL,
2774 ..HASH_BASE
2775 },
2776 ),
2777 (
2779 Language::Pascal,
2780 StaticLangConfig {
2781 line_comments: &["//"],
2782 block_comment: Some(("{", "}")),
2783 allow_single_quote_strings: true,
2784 allow_double_quote_strings: false,
2785 symbol_patterns: SP_PASCAL,
2786 ..C_SLASH_BASE
2787 },
2788 ),
2789 (
2791 Language::VisualBasic,
2792 StaticLangConfig {
2793 line_comments: &["'"],
2794 block_comment: None,
2795 allow_single_quote_strings: false,
2796 allow_double_quote_strings: true,
2797 symbol_patterns: SP_VB,
2798 ..C_SLASH_BASE
2799 },
2800 ),
2801 (
2803 Language::Lisp,
2804 StaticLangConfig {
2805 line_comments: &[";"],
2806 block_comment: Some(("#|", "|#")),
2807 allow_single_quote_strings: false,
2808 symbol_patterns: SP_LISP,
2809 ..C_SLASH_BASE
2810 },
2811 ),
2812 (
2815 Language::Fortran,
2816 StaticLangConfig {
2817 line_comments: &["!"],
2818 block_comment: None,
2819 symbol_patterns: SP_FORTRAN,
2820 ..C_SLASH_BASE
2821 },
2822 ),
2823 (
2825 Language::Nix,
2826 StaticLangConfig {
2827 block_comment: Some(("/*", "*/")),
2828 allow_single_quote_strings: false,
2829 symbol_patterns: SP_NONE,
2830 ..HASH_BASE
2831 },
2832 ),
2833 (
2835 Language::Crystal,
2836 StaticLangConfig {
2837 symbol_patterns: SP_CRYSTAL,
2838 ..HASH_BASE
2839 },
2840 ),
2841 (
2843 Language::D,
2844 StaticLangConfig {
2845 symbol_patterns: SP_D,
2846 ..C_SLASH_BASE
2847 },
2848 ),
2849 (
2851 Language::Glsl,
2852 StaticLangConfig {
2853 allow_single_quote_strings: false,
2854 symbol_patterns: SP_NONE,
2855 ..C_SLASH_BASE
2856 },
2857 ),
2858 (
2860 Language::Cmake,
2861 StaticLangConfig {
2862 block_comment: Some(("#[[", "]]")),
2863 allow_single_quote_strings: false,
2864 symbol_patterns: SP_CMAKE,
2865 ..HASH_BASE
2866 },
2867 ),
2868 (
2870 Language::Elm,
2871 StaticLangConfig {
2872 line_comments: &["--"],
2873 block_comment: Some(("{-", "-}")),
2874 allow_single_quote_strings: false,
2875 symbol_patterns: SP_ELM,
2876 ..C_SLASH_BASE
2877 },
2878 ),
2879 (
2881 Language::Awk,
2882 StaticLangConfig {
2883 allow_single_quote_strings: false,
2884 symbol_patterns: SP_AWK,
2885 ..HASH_BASE
2886 },
2887 ),
2888];
2889
2890#[derive(Debug, Clone, Copy)]
2893struct IeeeFlags {
2894 has_preprocessor_directives: bool,
2896 blank_in_block_comment_as_comment: bool,
2898 collapse_continuation_lines: bool,
2900}
2901
2902#[derive(Debug, Clone, Copy)]
2903enum StringState {
2904 Single(char),
2905 Triple(&'static str),
2906 VerbatimDouble,
2907}
2908
2909#[allow(clippy::struct_excessive_bools)]
2910#[derive(Debug, Default)]
2911struct LineFacts {
2912 has_code: bool,
2913 has_single_comment: bool,
2914 has_multi_comment: bool,
2915 has_docstring: bool,
2916}
2917
2918fn process_string_char(
2922 state: StringState,
2923 chars: &[char],
2924 i: usize,
2925) -> (Option<StringState>, usize) {
2926 match state {
2927 StringState::Single(delim) => {
2928 if chars[i] == '\\' {
2929 return (Some(state), 2); }
2931 if chars[i] == delim {
2932 (None, 1)
2933 } else {
2934 (Some(state), 1)
2935 }
2936 }
2937 StringState::Triple(delim) => {
2938 if starts_with(chars, i, delim) {
2939 (None, delim.len())
2940 } else {
2941 (Some(state), 1)
2942 }
2943 }
2944 StringState::VerbatimDouble => {
2945 if starts_with(chars, i, "\"\"") {
2946 return (Some(state), 2); }
2948 if chars[i] == '"' {
2949 (None, 1)
2950 } else {
2951 (Some(state), 1)
2952 }
2953 }
2954 }
2955}
2956
2957fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2961 if starts_with(chars, i, close) {
2962 (false, close.len())
2963 } else {
2964 (true, 1)
2965 }
2966}
2967
2968fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2972 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2973 return Some((StringState::VerbatimDouble, 2));
2974 }
2975 if config.allow_triple_quote_strings {
2976 if starts_with(chars, i, "\"\"\"") {
2977 return Some((StringState::Triple("\"\"\""), 3));
2978 }
2979 if starts_with(chars, i, "'''") {
2980 return Some((StringState::Triple("'''"), 3));
2981 }
2982 }
2983 if config.allow_single_quote_strings && chars[i] == '\'' {
2984 return Some((StringState::Single('\''), 1));
2985 }
2986 if config.allow_double_quote_strings && chars[i] == '"' {
2987 return Some((StringState::Single('"'), 1));
2988 }
2989 None
2990}
2991
2992fn step_through_block_comment(
2998 chars: &[char],
2999 i: usize,
3000 block_comment: Option<(&'static str, &'static str)>,
3001 in_block_comment: &mut bool,
3002) -> usize {
3003 if let Some((_, close)) = block_comment {
3004 let (still_in, advance) = process_block_comment_char(chars, i, close);
3005 *in_block_comment = still_in;
3006 return advance;
3007 }
3008 0
3009}
3010
3011fn try_open_block_comment(
3014 chars: &[char],
3015 i: usize,
3016 block_comment: Option<(&'static str, &'static str)>,
3017) -> Option<usize> {
3018 let (open, _) = block_comment?;
3019 starts_with(chars, i, open).then_some(open.len())
3020}
3021
3022fn scan_line(
3026 chars: &[char],
3027 config: &ScanConfig,
3028 facts: &mut LineFacts,
3029 in_block_comment: &mut bool,
3030 string_state: &mut Option<StringState>,
3031) {
3032 let mut i = 0usize;
3033 while i < chars.len() {
3034 if let Some(state) = *string_state {
3036 facts.has_code = true;
3037 let (new_state, advance) = process_string_char(state, chars, i);
3038 *string_state = new_state;
3039 i += advance;
3040 continue;
3041 }
3042
3043 if *in_block_comment {
3045 facts.has_multi_comment = true;
3046 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
3047 continue;
3048 }
3049
3050 if chars[i].is_whitespace() {
3052 i += 1;
3053 continue;
3054 }
3055
3056 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
3058 facts.has_code = true;
3059 *string_state = Some(new_state);
3060 i += advance;
3061 continue;
3062 }
3063
3064 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
3066 facts.has_multi_comment = true;
3067 *in_block_comment = true;
3068 i += advance;
3069 continue;
3070 }
3071
3072 if config
3074 .line_comments
3075 .iter()
3076 .any(|prefix| starts_with(chars, i, prefix))
3077 {
3078 facts.has_single_comment = true;
3079 break;
3080 }
3081
3082 facts.has_code = true;
3084 i += 1;
3085 }
3086}
3087
3088fn finalize_line_facts(
3093 facts: LineFacts,
3094 trimmed: &str,
3095 raw: &mut RawLineCounts,
3096 ieee: IeeeFlags,
3097 in_block_comment: bool,
3098 string_state: Option<StringState>,
3099 pending_continuation: &mut Option<LineFacts>,
3100) -> Option<LineFacts> {
3101 if ieee.has_preprocessor_directives
3105 && facts.has_code
3106 && !facts.has_single_comment
3107 && !facts.has_multi_comment
3108 && trimmed.starts_with('#')
3109 {
3110 raw.compiler_directive_lines += 1;
3111 }
3112
3113 let is_continuation = ieee.collapse_continuation_lines
3116 && !in_block_comment
3117 && string_state.is_none()
3118 && trimmed.ends_with('\\');
3119
3120 if is_continuation {
3121 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
3122 pending.has_code |= facts.has_code;
3123 pending.has_single_comment |= facts.has_single_comment;
3124 pending.has_multi_comment |= facts.has_multi_comment;
3125 pending.has_docstring |= facts.has_docstring;
3126 return None; }
3128
3129 let emit = if let Some(pending) = pending_continuation.take() {
3131 LineFacts {
3132 has_code: pending.has_code | facts.has_code,
3133 has_single_comment: pending.has_single_comment | facts.has_single_comment,
3134 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
3135 has_docstring: pending.has_docstring | facts.has_docstring,
3136 }
3137 } else {
3138 facts
3139 };
3140 Some(emit)
3141}
3142
3143#[allow(clippy::needless_pass_by_value)]
3148#[allow(clippy::too_many_arguments)]
3149#[allow(clippy::many_single_char_names)] fn process_physical_line(
3151 line: &str,
3152 line_idx: usize,
3153 config: &ScanConfig,
3154 raw: &mut RawLineCounts,
3155 in_block_comment: &mut bool,
3156 string_state: &mut Option<StringState>,
3157 pending_continuation: &mut Option<LineFacts>,
3158 ieee: IeeeFlags,
3159) {
3160 raw.total_physical_lines += 1;
3161
3162 if config.skip_lines.contains(&line_idx) {
3163 raw.docstring_comment_lines += 1;
3164 return;
3165 }
3166
3167 let trimmed = line.trim();
3168 let mut facts = LineFacts::default();
3169
3170 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
3174 facts.has_multi_comment = true;
3175 }
3176
3177 let chars: Vec<char> = line.chars().collect();
3178 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
3179
3180 let Some(emit) = finalize_line_facts(
3181 facts,
3182 trimmed,
3183 raw,
3184 ieee,
3185 *in_block_comment,
3186 *string_state,
3187 pending_continuation,
3188 ) else {
3189 return;
3190 };
3191
3192 classify_line(raw, &emit, trimmed);
3193
3194 if emit.has_code {
3195 use std::hash::{DefaultHasher, Hash, Hasher};
3196 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
3197 raw.functions += f;
3198 raw.classes += c;
3199 raw.variables += v;
3200 raw.imports += i;
3201 raw.test_count += t;
3202 raw.test_assertion_count += a;
3203 raw.test_suite_count += s;
3204
3205 raw.cyclomatic_complexity +=
3207 count_branch_in_line(trimmed.as_bytes(), config.branch_keywords);
3208
3209 match config.lsloc_strategy {
3211 LslocStrategy::Semicolons => {
3212 let semi = u32::try_from(trimmed.bytes().filter(|&b| b == b';').count())
3213 .unwrap_or(u32::MAX);
3214 *raw.lsloc.get_or_insert(0) += semi;
3215 }
3216 LslocStrategy::NonContinuationNewlines => {
3217 let cont = trimmed.ends_with('\\')
3218 || trimmed.ends_with(',')
3219 || trimmed.ends_with('(')
3220 || trimmed.ends_with('[')
3221 || trimmed.ends_with('{');
3222 if !cont {
3223 *raw.lsloc.get_or_insert(0) += 1;
3224 }
3225 }
3226 LslocStrategy::Unsupported => {}
3227 }
3228
3229 let mut h = DefaultHasher::new();
3231 trimmed.hash(&mut h);
3232 raw.code_line_hashes.push(h.finish());
3233 }
3234}
3235
3236#[allow(clippy::needless_pass_by_value)]
3237fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
3238 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3239 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3240
3241 let mut raw = RawLineCounts::default();
3242 let mut warnings = Vec::new();
3243
3244 let mut in_block_comment = false;
3245 let mut string_state: Option<StringState> = None;
3246 let mut pending_continuation: Option<LineFacts> = None;
3248
3249 for (line_idx, line) in lines.iter().enumerate() {
3250 process_physical_line(
3251 line,
3252 line_idx,
3253 &config,
3254 &mut raw,
3255 &mut in_block_comment,
3256 &mut string_state,
3257 &mut pending_continuation,
3258 ieee,
3259 );
3260 }
3261
3262 if let Some(pending) = pending_continuation.take() {
3264 classify_line(&mut raw, &pending, "");
3265 }
3266
3267 if in_block_comment {
3268 warnings.push("unclosed block comment detected; result is best effort".into());
3269 }
3270 if string_state.is_some() {
3271 warnings.push("unclosed string literal detected; result is best effort".into());
3272 }
3273
3274 RawFileAnalysis {
3275 raw,
3276 parse_mode: if warnings.is_empty() {
3277 ParseMode::Lexical
3278 } else {
3279 ParseMode::LexicalBestEffort
3280 },
3281 warnings,
3282 style_analysis: None,
3283 }
3284}
3285
3286const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
3287 if facts.has_docstring {
3288 raw.docstring_comment_lines += 1;
3289 } else if !facts.has_code
3290 && !facts.has_single_comment
3291 && !facts.has_multi_comment
3292 && trimmed.is_empty()
3293 {
3294 raw.blank_only_lines += 1;
3295 } else if facts.has_code && facts.has_single_comment {
3296 raw.mixed_code_single_comment_lines += 1;
3297 } else if facts.has_code && facts.has_multi_comment {
3298 raw.mixed_code_multi_comment_lines += 1;
3299 } else if facts.has_code {
3300 raw.code_only_lines += 1;
3301 } else if facts.has_single_comment {
3302 raw.single_comment_only_lines += 1;
3303 } else if facts.has_multi_comment {
3304 raw.multi_comment_only_lines += 1;
3305 } else if trimmed.is_empty() {
3306 raw.blank_only_lines += 1;
3307 } else {
3308 raw.skipped_unknown_lines += 1;
3309 }
3310}
3311
3312fn prefix_hit(pats: &[&str], trimmed: &str) -> u64 {
3314 u64::from(pats.iter().any(|p| trimmed.starts_with(p)))
3315}
3316
3317fn fn_prefix_paren_hit(patterns: &SymbolPatterns, trimmed: &str) -> u64 {
3320 if patterns.functions_prefix_paren.is_empty() {
3321 return 0;
3322 }
3323 let Some(paren_pos) = trimmed.find('(') else {
3324 return 0;
3325 };
3326 if trimmed[..paren_pos].contains('=') {
3327 0
3328 } else {
3329 prefix_hit(patterns.functions_prefix_paren, trimmed)
3330 }
3331}
3332
3333fn var_prefix_no_paren_hit(patterns: &SymbolPatterns, trimmed: &str) -> u64 {
3336 if patterns.variables_prefix_no_paren.is_empty()
3337 || prefix_hit(patterns.variables_prefix_no_paren, trimmed) == 0
3338 {
3339 return 0;
3340 }
3341 trimmed
3342 .find('(')
3343 .map_or(1, |pp| u64::from(trimmed[..pp].contains('=')))
3344}
3345
3346fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3347 let hit = |pats: &[&str]| prefix_hit(pats, trimmed);
3348 let fn_pp = fn_prefix_paren_hit(patterns, trimmed);
3349 let test_hit = hit(patterns.tests);
3350 let fn_hit = if test_hit == 0 {
3357 hit(patterns.functions) | fn_pp
3358 } else {
3359 0
3360 };
3361 let class_hit = if test_hit == 0 {
3362 hit(patterns.classes)
3363 } else {
3364 0
3365 };
3366 let var_pnp = var_prefix_no_paren_hit(patterns, trimmed);
3367 (
3368 fn_hit,
3369 class_hit,
3370 hit(patterns.variables) | var_pnp,
3371 hit(patterns.imports),
3372 test_hit,
3373 hit(patterns.assertions),
3374 hit(patterns.test_suites),
3375 )
3376}
3377
3378fn is_word_boundary(line: &[u8], start: usize, end: usize) -> bool {
3380 let before_ok =
3381 start == 0 || (!line[start - 1].is_ascii_alphanumeric() && line[start - 1] != b'_');
3382 let after_ok = end >= line.len() || (!line[end].is_ascii_alphanumeric() && line[end] != b'_');
3383 before_ok && after_ok
3384}
3385
3386fn keyword_matches_at(line: &[u8], i: usize, kw_bytes: &[u8], word_kw: bool) -> bool {
3388 if &line[i..i + kw_bytes.len()] != kw_bytes {
3389 return false;
3390 }
3391 !word_kw || is_word_boundary(line, i, i + kw_bytes.len())
3392}
3393
3394fn count_branch_in_line(line: &[u8], keywords: &[&str]) -> u32 {
3399 if keywords.is_empty() || line.is_empty() {
3400 return 0;
3401 }
3402 let mut total = 0u32;
3403 for &kw in keywords {
3404 let kw_bytes = kw.as_bytes();
3405 let word_kw = kw.bytes().all(|b| b.is_ascii_alphabetic() || b == b'_');
3406 let mut i = 0usize;
3407 while i + kw_bytes.len() <= line.len() {
3408 if keyword_matches_at(line, i, kw_bytes, word_kw) {
3409 total += 1;
3410 i += kw_bytes.len();
3411 } else {
3412 i += 1;
3413 }
3414 }
3415 }
3416 total
3417}
3418
3419fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
3420 let needle_chars: Vec<char> = needle.chars().collect();
3421 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
3422}
3423
3424#[derive(Debug, Clone)]
3425struct PyContext {
3426 indent: usize,
3427 expect_docstring: bool,
3428}
3429
3430fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
3432 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
3433 contexts.pop();
3434 }
3435}
3436
3437fn py_handle_pending_indent(
3440 pending_block_indent: &mut Option<usize>,
3441 contexts: &mut Vec<PyContext>,
3442 indent: usize,
3443 trimmed: &str,
3444) {
3445 let Some(base_indent) = *pending_block_indent else {
3446 return;
3447 };
3448 if indent > base_indent {
3449 contexts.push(PyContext {
3450 indent,
3451 expect_docstring: true,
3452 });
3453 *pending_block_indent = None;
3454 } else if !trimmed.starts_with('@') {
3455 *pending_block_indent = None;
3456 }
3457}
3458
3459fn py_try_record_docstring(
3465 ctx: &mut PyContext,
3466 trimmed: &str,
3467 idx: usize,
3468 docstring_lines: &mut HashSet<usize>,
3469 active_docstring: &mut Option<(&'static str, usize)>,
3470) -> bool {
3471 if !ctx.expect_docstring {
3472 return false;
3473 }
3474 if let Some(delim) = docstring_delimiter(trimmed) {
3475 docstring_lines.insert(idx);
3476 ctx.expect_docstring = false;
3477 if !closes_triple_docstring(trimmed, delim, true) {
3478 *active_docstring = Some((delim, idx));
3479 }
3480 return true;
3481 }
3482 ctx.expect_docstring = false;
3483 false
3484}
3485
3486fn track_active_docstring(
3490 active_docstring: &mut Option<(&'static str, usize)>,
3491 docstring_lines: &mut HashSet<usize>,
3492 idx: usize,
3493 trimmed: &str,
3494) -> bool {
3495 let Some((delim, start_line)) = *active_docstring else {
3496 return false;
3497 };
3498 docstring_lines.insert(idx);
3499 if closes_triple_docstring(trimmed, delim, idx == start_line) {
3500 *active_docstring = None;
3501 }
3502 true
3503}
3504
3505fn try_record_docstring_if_context(
3508 contexts: &mut [PyContext],
3509 trimmed: &str,
3510 idx: usize,
3511 docstring_lines: &mut HashSet<usize>,
3512 active_docstring: &mut Option<(&'static str, usize)>,
3513) -> bool {
3514 let Some(ctx) = contexts.last_mut() else {
3515 return false;
3516 };
3517 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
3518}
3519
3520fn mark_unclosed_docstring_lines(
3522 active_docstring: Option<&(&'static str, usize)>,
3523 docstring_lines: &mut HashSet<usize>,
3524 num_lines: usize,
3525) {
3526 if let Some(&(_, start_line)) = active_docstring {
3527 for idx in start_line..num_lines {
3528 docstring_lines.insert(idx);
3529 }
3530 }
3531}
3532
3533fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
3534 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3535 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3536
3537 let mut docstring_lines = HashSet::new();
3538 let mut contexts = vec![PyContext {
3539 indent: 0,
3540 expect_docstring: true,
3541 }];
3542 let mut pending_block_indent: Option<usize> = None;
3543 let mut active_docstring: Option<(&'static str, usize)> = None;
3544
3545 for (idx, line) in lines.iter().enumerate() {
3546 let trimmed = line.trim();
3547 let indent = leading_indent(line);
3548
3549 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
3550 continue;
3551 }
3552
3553 if trimmed.is_empty() || trimmed.starts_with('#') {
3555 continue;
3556 }
3557
3558 py_pop_outdented_contexts(&mut contexts, indent);
3559 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
3560
3561 if try_record_docstring_if_context(
3562 &mut contexts,
3563 trimmed,
3564 idx,
3565 &mut docstring_lines,
3566 &mut active_docstring,
3567 ) {
3568 continue;
3569 }
3570
3571 if is_python_block_header(trimmed) {
3572 pending_block_indent = Some(indent);
3573 }
3574 }
3575
3576 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
3577
3578 docstring_lines
3579}
3580
3581fn leading_indent(line: &str) -> usize {
3582 line.chars().take_while(|c| c.is_whitespace()).count()
3583}
3584
3585fn is_python_block_header(trimmed: &str) -> bool {
3586 (trimmed.starts_with("def ")
3587 || trimmed.starts_with("async def ")
3588 || trimmed.starts_with("class "))
3589 && trimmed.ends_with(':')
3590}
3591
3592fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
3593 let mut idx = 0usize;
3594 let bytes = trimmed.as_bytes();
3595 while idx < bytes.len() {
3596 let c = bytes[idx] as char;
3597 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
3598 idx += 1;
3599 continue;
3600 }
3601 break;
3602 }
3603
3604 let rest = &trimmed[idx..];
3605 if rest.starts_with("\"\"\"") {
3606 Some("\"\"\"")
3607 } else if rest.starts_with("'''") {
3608 Some("'''")
3609 } else {
3610 None
3611 }
3612}
3613
3614fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
3615 let mut occurrences = 0usize;
3616 let mut search = trimmed;
3617 while let Some(index) = search.find(delim) {
3618 occurrences += 1;
3619 search = &search[index + delim.len()..];
3620 }
3621
3622 if same_line_as_start {
3623 occurrences >= 2
3624 } else {
3625 occurrences >= 1
3626 }
3627}
3628
3629#[cfg(feature = "tree-sitter")]
3634pub mod ts {
3635 use tree_sitter::Node;
3636
3637 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
3638
3639 struct SymbolKinds {
3641 function_def: &'static str,
3643 class_def: &'static str,
3645 test_fn_prefix: &'static str,
3648 test_class_prefix: &'static str,
3651 assertion_attr_prefix: &'static str,
3655 }
3656
3657 impl SymbolKinds {
3658 const fn none() -> Self {
3659 Self {
3660 function_def: "",
3661 class_def: "",
3662 test_fn_prefix: "",
3663 test_class_prefix: "",
3664 assertion_attr_prefix: "",
3665 }
3666 }
3667 }
3668
3669 fn analyze_lines(
3675 text: &str,
3676 ts_language: &tree_sitter::Language,
3677 comment_node_kinds: &[&str],
3678 docstring_stmt_kind: Option<&str>,
3679 symbols: &SymbolKinds,
3680 ) -> Option<RawFileAnalysis> {
3681 let mut parser = tree_sitter::Parser::new();
3682 parser.set_language(ts_language).ok()?;
3683 let tree = parser.parse(text, None)?;
3684
3685 let lines: Vec<&str> = text.split_terminator('\n').collect();
3686 let n = lines.len();
3687
3688 let mut has_code = vec![false; n];
3689 let mut has_comment = vec![false; n];
3690 let mut comment_is_block = vec![false; n];
3691 let mut has_docstring = vec![false; n];
3692
3693 let mut ctx = VisitCtx {
3695 source: text.as_bytes(),
3696 comment_kinds: comment_node_kinds,
3697 docstring_stmt_kind,
3698 has_code: &mut has_code,
3699 has_comment: &mut has_comment,
3700 comment_is_block: &mut comment_is_block,
3701 has_docstring: &mut has_docstring,
3702 };
3703 visit(tree.root_node(), &mut ctx);
3704
3705 let mut raw = RawLineCounts::default();
3706 classify_ts_lines(
3707 &lines,
3708 &has_code,
3709 &has_comment,
3710 &comment_is_block,
3711 &has_docstring,
3712 &mut raw,
3713 );
3714
3715 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
3717 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
3718 }
3719
3720 Some(RawFileAnalysis {
3721 raw,
3722 parse_mode: ParseMode::TreeSitter,
3723 warnings: Vec::new(),
3724 style_analysis: None,
3725 })
3726 }
3727
3728 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3730 for i in 0..node.child_count() {
3731 #[allow(clippy::cast_possible_truncation)]
3732 if let Some(child) = node.child(i as u32) {
3733 count_symbols(child, source, kinds, raw);
3734 }
3735 }
3736 }
3737
3738 fn try_count_function(
3740 node: Node,
3741 source: &[u8],
3742 kinds: &SymbolKinds,
3743 raw: &mut RawLineCounts,
3744 ) -> bool {
3745 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
3746 return false;
3747 }
3748 let name = node
3749 .child_by_field_name("name")
3750 .and_then(|n| n.utf8_text(source).ok())
3751 .unwrap_or("");
3752 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
3753 raw.test_count += 1;
3754 } else {
3755 raw.functions += 1;
3756 }
3757 recurse_children(node, source, kinds, raw);
3758 true
3759 }
3760
3761 fn try_count_class(
3763 node: Node,
3764 source: &[u8],
3765 kinds: &SymbolKinds,
3766 raw: &mut RawLineCounts,
3767 ) -> bool {
3768 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
3769 return false;
3770 }
3771 let name = node
3772 .child_by_field_name("name")
3773 .and_then(|n| n.utf8_text(source).ok())
3774 .unwrap_or("");
3775 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
3776 raw.test_count += 1;
3777 } else {
3778 raw.classes += 1;
3779 }
3780 recurse_children(node, source, kinds, raw);
3781 true
3782 }
3783
3784 fn try_count_assertion(
3787 node: Node,
3788 source: &[u8],
3789 kinds: &SymbolKinds,
3790 raw: &mut RawLineCounts,
3791 ) -> bool {
3792 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
3793 return false;
3794 }
3795 let Some(func) = node.child_by_field_name("function") else {
3796 return false;
3797 };
3798 if func.kind() != "attribute" {
3799 return false;
3800 }
3801 let attr_text = func
3802 .child_by_field_name("attribute")
3803 .and_then(|n| n.utf8_text(source).ok())
3804 .unwrap_or("");
3805 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
3806 return false;
3807 }
3808 raw.test_assertion_count += 1;
3809 true
3810 }
3811
3812 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3815 if try_count_function(node, source, kinds, raw) {
3816 return;
3817 }
3818 if try_count_class(node, source, kinds, raw) {
3819 return;
3820 }
3821 if try_count_assertion(node, source, kinds, raw) {
3822 return;
3823 }
3824 recurse_children(node, source, kinds, raw);
3825 }
3826
3827 #[allow(clippy::struct_excessive_bools)]
3830 #[derive(Clone, Copy)]
3831 struct TsLineFlags {
3832 has_code: bool,
3833 has_comment: bool,
3834 comment_is_block: bool,
3835 has_docstring: bool,
3836 }
3837
3838 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
3840 if trimmed.is_empty() {
3841 raw.blank_only_lines += 1;
3842 } else if flags.has_docstring && !flags.has_code {
3843 raw.docstring_comment_lines += 1;
3844 } else if flags.has_code && flags.has_comment {
3845 if flags.comment_is_block {
3847 raw.mixed_code_multi_comment_lines += 1;
3848 } else {
3849 raw.mixed_code_single_comment_lines += 1;
3850 }
3851 } else if flags.has_comment {
3852 if flags.comment_is_block {
3853 raw.multi_comment_only_lines += 1;
3854 } else {
3855 raw.single_comment_only_lines += 1;
3856 }
3857 } else {
3858 raw.code_only_lines += 1;
3859 }
3860 }
3861
3862 fn classify_ts_lines(
3864 lines: &[&str],
3865 has_code: &[bool],
3866 has_comment: &[bool],
3867 comment_is_block: &[bool],
3868 has_docstring: &[bool],
3869 raw: &mut RawLineCounts,
3870 ) {
3871 for i in 0..lines.len() {
3872 raw.total_physical_lines += 1;
3873 classify_ts_line(
3874 lines[i].trim(),
3875 TsLineFlags {
3876 has_code: has_code[i],
3877 has_comment: has_comment[i],
3878 comment_is_block: comment_is_block[i],
3879 has_docstring: has_docstring[i],
3880 },
3881 raw,
3882 );
3883 }
3884 }
3885
3886 struct VisitCtx<'a> {
3887 source: &'a [u8],
3888 comment_kinds: &'a [&'a str],
3889 docstring_stmt_kind: Option<&'a str>,
3890 has_code: &'a mut Vec<bool>,
3891 has_comment: &'a mut Vec<bool>,
3892 comment_is_block: &'a mut Vec<bool>,
3893 has_docstring: &'a mut Vec<bool>,
3894 }
3895
3896 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
3898 let start_row = node.start_position().row;
3899 let end_row = node.end_position().row;
3900 let first_two = node
3901 .utf8_text(ctx.source)
3902 .unwrap_or("")
3903 .get(..2)
3904 .unwrap_or("");
3905 let is_block = first_two == "/*" || first_two == "<#";
3906 for row in start_row..=end_row {
3907 if row < ctx.has_comment.len() {
3908 ctx.has_comment[row] = true;
3909 if is_block {
3910 ctx.comment_is_block[row] = true;
3911 }
3912 }
3913 }
3914 }
3915
3916 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
3919 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
3920 return false;
3921 };
3922 if kind != stmt_kind || node.named_child_count() != 1 {
3923 return false;
3924 }
3925 let Some(child) = node.named_child(0) else {
3926 return false;
3927 };
3928 if child.kind() != "string" {
3929 return false;
3930 }
3931 let child_start = child.start_position().row;
3932 let child_end = child.end_position().row;
3933 for row in child_start..=child_end {
3934 if row < ctx.has_docstring.len() {
3935 ctx.has_docstring[row] = true;
3936 }
3937 }
3938 true
3939 }
3940
3941 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
3943 let start_row = node.start_position().row;
3944 let end_row = node.end_position().row;
3945 for row in start_row..=end_row {
3946 if row < ctx.has_code.len() {
3947 ctx.has_code[row] = true;
3948 }
3949 }
3950 }
3951
3952 #[allow(clippy::too_many_lines)]
3953 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
3954 let kind = node.kind();
3955
3956 if ctx.comment_kinds.contains(&kind) {
3958 visit_comment_node(node, ctx);
3959 return;
3960 }
3961
3962 if visit_maybe_docstring(node, kind, ctx) {
3964 return;
3965 }
3966
3967 if node.child_count() == 0 && !node.is_extra() {
3969 visit_leaf_code(node, ctx);
3970 return;
3971 }
3972
3973 for i in 0..node.child_count() {
3974 #[allow(clippy::cast_possible_truncation)]
3975 if let Some(child) = node.child(i as u32) {
3977 visit(child, ctx);
3978 }
3979 }
3980 }
3981
3982 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3983
3984 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3985 function_def: "function_definition",
3986 class_def: "class_definition",
3987 test_fn_prefix: "test_",
3988 test_class_prefix: "Test",
3989 assertion_attr_prefix: "assert",
3990 };
3991
3992 #[must_use]
3994 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3995 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3996 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3997 }
3998
3999 #[must_use]
4001 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
4002 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
4003 analyze_lines(
4004 text,
4005 &lang,
4006 &["comment"],
4007 Some("expression_statement"),
4008 &PYTHON_SYMBOLS,
4009 )
4010 }
4011}
4012
4013#[cfg(test)]
4014mod tests {
4015 use super::*;
4016
4017 #[test]
4018 fn python_docstrings_are_separated() {
4019 let input = r#""""module docs"""
4020
4021
4022def fn_a():
4023 """function docs"""
4024 value = 1 # trailing comment
4025 return value
4026"#;
4027
4028 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
4029 assert_eq!(result.raw.docstring_comment_lines, 2);
4030 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
4031 assert_eq!(result.raw.code_only_lines, 2);
4032 }
4033
4034 #[test]
4035 fn c_style_mixed_lines_are_captured() {
4036 let input = "int x = 1; // note\n/* block */\n";
4037 let result = analyze_text(Language::C, input, AnalysisOptions::default());
4038 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
4039 assert_eq!(result.raw.multi_comment_only_lines, 1);
4040 }
4041
4042 #[test]
4043 fn detect_language_by_shebang() {
4044 let language = detect_language(
4045 Path::new("script"),
4046 Some("#!/usr/bin/env bash"),
4047 &BTreeMap::new(),
4048 true,
4049 );
4050 assert_eq!(language, Some(Language::Shell));
4051 }
4052
4053 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
4056 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
4057 let r = &result.raw;
4058 (
4059 r.functions,
4060 r.classes,
4061 r.variables,
4062 r.imports,
4063 r.test_count,
4064 r.test_assertion_count,
4065 r.test_suite_count,
4066 )
4067 }
4068
4069 #[test]
4070 fn python_test_fn_not_double_counted() {
4071 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
4073 assert_eq!(f, 0, "test fn must not also increment functions");
4074 assert_eq!(t, 1, "must be counted as a test");
4075 assert_eq!(c, 0);
4076 }
4077
4078 #[test]
4079 fn python_test_class_not_double_counted() {
4080 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
4082 assert_eq!(c, 0, "test class must not also increment classes");
4083 assert_eq!(t, 1, "must be counted as a test");
4084 assert_eq!(f, 0);
4085 }
4086
4087 #[test]
4088 fn python_regular_fn_counts_as_function() {
4089 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
4090 assert_eq!(f, 1, "regular function must be counted");
4091 assert_eq!(t, 0);
4092 assert_eq!(c, 0);
4093 }
4094
4095 #[test]
4096 fn python_regular_class_counts_as_class() {
4097 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
4098 assert_eq!(c, 1, "regular class must be counted");
4099 assert_eq!(t, 0);
4100 assert_eq!(f, 0);
4101 }
4102
4103 #[test]
4104 fn go_test_fn_not_double_counted() {
4105 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
4106 assert_eq!(f, 0, "Go test func must not also increment functions");
4107 assert_eq!(t, 1, "must be counted as a test");
4108 }
4109
4110 #[test]
4111 fn go_benchmark_fn_not_double_counted() {
4112 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
4113 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
4114 assert_eq!(t, 1, "must be counted as a test");
4115 }
4116
4117 #[test]
4118 fn go_regular_fn_counts_as_function() {
4119 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
4120 assert_eq!(f, 1, "regular Go func must be counted");
4121 assert_eq!(t, 0);
4122 }
4123
4124 #[test]
4125 fn rust_test_attr_counts_as_test_not_function() {
4126 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
4128 assert_eq!(t, 1, "#[test] must be counted as a test");
4129 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
4130 }
4131
4132 #[test]
4133 fn rust_fn_line_counts_as_function_not_test() {
4134 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
4136 assert_eq!(f, 1, "fn declaration must count as a function");
4137 assert_eq!(
4138 t, 0,
4139 "fn declaration line must not be double-counted as a test"
4140 );
4141 }
4142
4143 #[test]
4144 fn js_describe_counts_as_test_not_function() {
4145 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
4146 assert_eq!(t, 1, "describe must be counted as a test");
4147 assert_eq!(f, 0, "describe must not be counted as a function");
4148 }
4149
4150 #[test]
4151 fn js_regular_fn_counts_as_function() {
4152 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
4153 assert_eq!(f, 1, "JS function declaration must be counted");
4154 assert_eq!(t, 0);
4155 }
4156
4157 use std::collections::BTreeMap;
4160 use std::path::Path;
4161
4162 #[test]
4163 fn detect_language_rs_extension() {
4164 let lang = detect_language(Path::new("foo.rs"), None, &BTreeMap::new(), false);
4165 assert_eq!(lang, Some(Language::Rust));
4166 }
4167
4168 #[test]
4169 fn detect_language_py_extension() {
4170 let lang = detect_language(Path::new("foo.py"), None, &BTreeMap::new(), false);
4171 assert_eq!(lang, Some(Language::Python));
4172 }
4173
4174 #[test]
4175 fn detect_language_ts_extension() {
4176 let lang = detect_language(Path::new("app.ts"), None, &BTreeMap::new(), false);
4177 assert_eq!(lang, Some(Language::TypeScript));
4178 }
4179
4180 #[test]
4181 fn detect_language_js_extension() {
4182 let lang = detect_language(Path::new("app.js"), None, &BTreeMap::new(), false);
4183 assert_eq!(lang, Some(Language::JavaScript));
4184 }
4185
4186 #[test]
4187 fn detect_language_go_extension() {
4188 let lang = detect_language(Path::new("main.go"), None, &BTreeMap::new(), false);
4189 assert_eq!(lang, Some(Language::Go));
4190 }
4191
4192 #[test]
4193 fn detect_language_c_extension() {
4194 let lang = detect_language(Path::new("main.c"), None, &BTreeMap::new(), false);
4195 assert_eq!(lang, Some(Language::C));
4196 }
4197
4198 #[test]
4199 fn detect_language_cpp_extension() {
4200 let lang = detect_language(Path::new("main.cpp"), None, &BTreeMap::new(), false);
4201 assert_eq!(lang, Some(Language::Cpp));
4202 }
4203
4204 #[test]
4205 fn detect_language_java_extension() {
4206 let lang = detect_language(Path::new("Main.java"), None, &BTreeMap::new(), false);
4207 assert_eq!(lang, Some(Language::Java));
4208 }
4209
4210 #[test]
4211 fn detect_language_makefile_exact_name() {
4212 let lang = detect_language(Path::new("Makefile"), None, &BTreeMap::new(), false);
4213 assert_eq!(lang, Some(Language::Makefile));
4214 }
4215
4216 #[test]
4217 fn detect_language_dockerfile_exact_name() {
4218 let lang = detect_language(Path::new("Dockerfile"), None, &BTreeMap::new(), false);
4219 assert_eq!(lang, Some(Language::Dockerfile));
4220 }
4221
4222 #[test]
4223 fn detect_language_rakefile() {
4224 let lang = detect_language(Path::new("Rakefile"), None, &BTreeMap::new(), false);
4225 assert_eq!(lang, Some(Language::Ruby));
4226 }
4227
4228 #[test]
4229 fn detect_language_gemfile() {
4230 let lang = detect_language(Path::new("Gemfile"), None, &BTreeMap::new(), false);
4231 assert_eq!(lang, Some(Language::Ruby));
4232 }
4233
4234 #[test]
4235 fn detect_language_unknown_extension_returns_none() {
4236 let lang = detect_language(Path::new("foo.xyz123"), None, &BTreeMap::new(), false);
4237 assert_eq!(lang, None);
4238 }
4239
4240 #[test]
4241 fn detect_language_extension_override() {
4242 let mut overrides = BTreeMap::new();
4243 overrides.insert("h".into(), "cpp".into());
4244 let lang = detect_language(Path::new("header.h"), None, &overrides, false);
4245 assert_eq!(lang, Some(Language::Cpp));
4246 }
4247
4248 #[test]
4249 fn detect_language_shebang_python() {
4250 let lang = detect_language(
4251 Path::new("script"),
4252 Some("#!/usr/bin/env python3"),
4253 &BTreeMap::new(),
4254 true,
4255 );
4256 assert_eq!(lang, Some(Language::Python));
4257 }
4258
4259 #[test]
4260 fn detect_language_shebang_bash() {
4261 let lang = detect_language(
4262 Path::new("script"),
4263 Some("#!/bin/bash"),
4264 &BTreeMap::new(),
4265 true,
4266 );
4267 assert_eq!(lang, Some(Language::Shell));
4268 }
4269
4270 #[test]
4271 fn detect_language_shebang_ruby() {
4272 let lang = detect_language(
4273 Path::new("script"),
4274 Some("#!/usr/bin/env ruby"),
4275 &BTreeMap::new(),
4276 true,
4277 );
4278 assert_eq!(lang, Some(Language::Ruby));
4279 }
4280
4281 #[test]
4282 fn detect_language_shebang_disabled() {
4283 let lang = detect_language(
4285 Path::new("script"),
4286 Some("#!/usr/bin/env python3"),
4287 &BTreeMap::new(),
4288 false,
4289 );
4290 assert_eq!(lang, None);
4291 }
4292
4293 #[test]
4294 fn from_name_rust() {
4295 assert_eq!(Language::from_name("rust"), Some(Language::Rust));
4296 }
4297
4298 #[test]
4299 fn from_name_python() {
4300 assert_eq!(Language::from_name("python"), Some(Language::Python));
4301 }
4302
4303 #[test]
4304 fn from_name_unknown() {
4305 assert_eq!(Language::from_name("brainfuck"), None);
4306 }
4307
4308 #[test]
4309 fn from_name_roundtrip_all() {
4310 for lang in [
4312 Language::C,
4313 Language::Cpp,
4314 Language::CSharp,
4315 Language::Go,
4316 Language::Java,
4317 Language::JavaScript,
4318 Language::Python,
4319 Language::Rust,
4320 Language::Shell,
4321 Language::PowerShell,
4322 Language::TypeScript,
4323 Language::Assembly,
4324 Language::Clojure,
4325 Language::Css,
4326 Language::Dart,
4327 Language::Dockerfile,
4328 Language::Elixir,
4329 Language::Erlang,
4330 Language::FSharp,
4331 Language::Groovy,
4332 Language::Haskell,
4333 Language::Html,
4334 Language::Julia,
4335 Language::Kotlin,
4336 Language::Lua,
4337 Language::Makefile,
4338 Language::Nim,
4339 Language::ObjectiveC,
4340 Language::Ocaml,
4341 Language::Perl,
4342 Language::Php,
4343 Language::R,
4344 Language::Ruby,
4345 Language::Scala,
4346 Language::Scss,
4347 Language::Sql,
4348 Language::Svelte,
4349 Language::Swift,
4350 Language::Vue,
4351 Language::Xml,
4352 Language::Zig,
4353 ] {
4354 let slug = lang.as_slug();
4355 let roundtripped = Language::from_name(slug);
4356 assert_eq!(
4357 roundtripped,
4358 Some(lang),
4359 "from_name({slug:?}) should return {lang:?}"
4360 );
4361 }
4362 }
4363
4364 #[test]
4367 fn blank_in_block_comment_defaults_to_comment() {
4368 let input = "/*\n\n*/";
4370 let opts = AnalysisOptions {
4371 blank_in_block_comment_as_comment: true,
4372 ..Default::default()
4373 };
4374 let result = analyze_text(Language::C, input, opts);
4375 assert_eq!(
4376 result.raw.multi_comment_only_lines, 3,
4377 "all 3 block-comment lines must count as multi-comment with CountAsComment policy"
4378 );
4379 assert_eq!(
4380 result.raw.blank_only_lines, 0,
4381 "no blank lines expected with CountAsComment policy"
4382 );
4383 }
4384
4385 #[test]
4386 fn blank_in_block_comment_counted_as_blank_when_policy_false() {
4387 let input = "/*\n\n*/";
4389 let opts = AnalysisOptions {
4390 blank_in_block_comment_as_comment: false,
4391 ..Default::default()
4392 };
4393 let result = analyze_text(Language::C, input, opts);
4394 assert_eq!(
4395 result.raw.multi_comment_only_lines, 2,
4396 "opener and closer must count as multi-comment with CountAsBlank policy"
4397 );
4398 assert_eq!(
4399 result.raw.blank_only_lines, 1,
4400 "the blank line inside the block comment must count as blank with CountAsBlank policy"
4401 );
4402 }
4403
4404 #[test]
4407 fn continuation_lines_each_physical_default() {
4408 let input = "#define FOO \\\n 1 \\\n + 2\n";
4410 let opts = AnalysisOptions {
4411 collapse_continuation_lines: false,
4412 ..Default::default()
4413 };
4414 let result = analyze_text(Language::C, input, opts);
4415 assert_eq!(
4416 result.raw.total_physical_lines, 3,
4417 "3 physical lines expected"
4418 );
4419 assert_eq!(
4420 result.raw.code_only_lines, 3,
4421 "each physical line must count as code with EachPhysicalLine policy"
4422 );
4423 }
4424
4425 #[test]
4426 fn continuation_lines_collapse_to_logical() {
4427 let input = "#define FOO \\\n 1 \\\n + 2\n";
4429 let opts = AnalysisOptions {
4430 collapse_continuation_lines: true,
4431 ..Default::default()
4432 };
4433 let result = analyze_text(Language::C, input, opts);
4434 assert_eq!(
4435 result.raw.total_physical_lines, 3,
4436 "physical line count is always 3 regardless of policy"
4437 );
4438 assert_eq!(
4439 result.raw.code_only_lines, 1,
4440 "3 continuation lines must collapse to 1 logical code line"
4441 );
4442 }
4443}