1pub mod style;
5pub use style::{IndentStyle, StyleAnalysis, StyleGuideScore, StyleSignal};
6
7use std::collections::{BTreeMap, BTreeSet, HashSet};
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum Language {
15 C,
16 Cpp,
17 CSharp,
18 Go,
19 Java,
20 JavaScript,
21 Python,
22 Rust,
23 Shell,
24 PowerShell,
25 TypeScript,
26 Assembly,
28 Clojure,
29 Css,
30 Dart,
31 Dockerfile,
32 Elixir,
33 Erlang,
34 FSharp,
35 Groovy,
36 Haskell,
37 Html,
38 Julia,
39 Kotlin,
40 Lua,
41 Makefile,
42 Nim,
43 ObjectiveC,
44 Ocaml,
45 Perl,
46 Php,
47 R,
48 Ruby,
49 Scala,
50 Scss,
51 Sql,
52 Svelte,
53 Swift,
54 Vue,
55 Xml,
56 Zig,
57 Solidity,
59 Protobuf,
60 Hcl,
61 GraphQl,
62 Ada,
64 Vhdl,
65 Verilog,
66 Tcl,
67 Pascal,
68 VisualBasic,
69 Lisp,
70 Fortran,
72 Nix,
73 Crystal,
74 D,
75 Glsl,
76 Cmake,
77 Elm,
78 Awk,
79}
80
81impl Language {
82 #[must_use]
83 pub const fn display_name(&self) -> &'static str {
84 match self {
85 Self::C => "C",
86 Self::Cpp => "C++",
87 Self::CSharp => "C#",
88 Self::Go => "Go",
89 Self::Java => "Java",
90 Self::JavaScript => "JavaScript",
91 Self::Python => "Python",
92 Self::Rust => "Rust",
93 Self::Shell => "Shell",
94 Self::PowerShell => "PowerShell",
95 Self::TypeScript => "TypeScript",
96 Self::Assembly => "Assembly",
97 Self::Clojure => "Clojure",
98 Self::Css => "CSS",
99 Self::Dart => "Dart",
100 Self::Dockerfile => "Dockerfile",
101 Self::Elixir => "Elixir",
102 Self::Erlang => "Erlang",
103 Self::FSharp => "F#",
104 Self::Groovy => "Groovy",
105 Self::Haskell => "Haskell",
106 Self::Html => "HTML",
107 Self::Julia => "Julia",
108 Self::Kotlin => "Kotlin",
109 Self::Lua => "Lua",
110 Self::Makefile => "Makefile",
111 Self::Nim => "Nim",
112 Self::ObjectiveC => "Objective-C",
113 Self::Ocaml => "OCaml",
114 Self::Perl => "Perl",
115 Self::Php => "PHP",
116 Self::R => "R",
117 Self::Ruby => "Ruby",
118 Self::Scala => "Scala",
119 Self::Scss => "SCSS",
120 Self::Sql => "SQL",
121 Self::Svelte => "Svelte",
122 Self::Swift => "Swift",
123 Self::Vue => "Vue",
124 Self::Xml => "XML",
125 Self::Zig => "Zig",
126 Self::Solidity => "Solidity",
127 Self::Protobuf => "Protocol Buffers",
128 Self::Hcl => "HCL/Terraform",
129 Self::GraphQl => "GraphQL",
130 Self::Ada => "Ada",
131 Self::Vhdl => "VHDL",
132 Self::Verilog => "Verilog/SystemVerilog",
133 Self::Tcl => "Tcl",
134 Self::Pascal => "Pascal/Delphi",
135 Self::VisualBasic => "Visual Basic",
136 Self::Lisp => "Lisp/Scheme",
137 Self::Fortran => "Fortran",
138 Self::Nix => "Nix",
139 Self::Crystal => "Crystal",
140 Self::D => "D",
141 Self::Glsl => "GLSL/HLSL",
142 Self::Cmake => "CMake",
143 Self::Elm => "Elm",
144 Self::Awk => "Awk",
145 }
146 }
147
148 #[must_use]
149 pub const fn as_slug(&self) -> &'static str {
150 match self {
151 Self::C => "c",
152 Self::Cpp => "cpp",
153 Self::CSharp => "csharp",
154 Self::Go => "go",
155 Self::Java => "java",
156 Self::JavaScript => "javascript",
157 Self::Python => "python",
158 Self::Rust => "rust",
159 Self::Shell => "shell",
160 Self::PowerShell => "powershell",
161 Self::TypeScript => "typescript",
162 Self::Assembly => "assembly",
163 Self::Clojure => "clojure",
164 Self::Css => "css",
165 Self::Dart => "dart",
166 Self::Dockerfile => "dockerfile",
167 Self::Elixir => "elixir",
168 Self::Erlang => "erlang",
169 Self::FSharp => "fsharp",
170 Self::Groovy => "groovy",
171 Self::Haskell => "haskell",
172 Self::Html => "html",
173 Self::Julia => "julia",
174 Self::Kotlin => "kotlin",
175 Self::Lua => "lua",
176 Self::Makefile => "makefile",
177 Self::Nim => "nim",
178 Self::ObjectiveC => "objectivec",
179 Self::Ocaml => "ocaml",
180 Self::Perl => "perl",
181 Self::Php => "php",
182 Self::R => "r",
183 Self::Ruby => "ruby",
184 Self::Scala => "scala",
185 Self::Scss => "scss",
186 Self::Sql => "sql",
187 Self::Svelte => "svelte",
188 Self::Swift => "swift",
189 Self::Vue => "vue",
190 Self::Xml => "xml",
191 Self::Zig => "zig",
192 Self::Solidity => "solidity",
193 Self::Protobuf => "protobuf",
194 Self::Hcl => "hcl",
195 Self::GraphQl => "graphql",
196 Self::Ada => "ada",
197 Self::Vhdl => "vhdl",
198 Self::Verilog => "verilog",
199 Self::Tcl => "tcl",
200 Self::Pascal => "pascal",
201 Self::VisualBasic => "visualbasic",
202 Self::Lisp => "lisp",
203 Self::Fortran => "fortran",
204 Self::Nix => "nix",
205 Self::Crystal => "crystal",
206 Self::D => "d",
207 Self::Glsl => "glsl",
208 Self::Cmake => "cmake",
209 Self::Elm => "elm",
210 Self::Awk => "awk",
211 }
212 }
213
214 #[must_use]
215 pub fn from_name(name: &str) -> Option<Self> {
216 match name.trim().to_ascii_lowercase().as_str() {
217 "c" => Some(Self::C),
218 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
219 "csharp" | "c#" | "cs" => Some(Self::CSharp),
220 "go" | "golang" => Some(Self::Go),
221 "java" => Some(Self::Java),
222 "javascript" | "js" => Some(Self::JavaScript),
223 "python" | "py" => Some(Self::Python),
224 "rust" | "rs" => Some(Self::Rust),
225 "shell" | "sh" | "bash" => Some(Self::Shell),
226 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
227 "typescript" | "ts" => Some(Self::TypeScript),
228 "assembly" | "asm" => Some(Self::Assembly),
229 "clojure" | "clj" => Some(Self::Clojure),
230 "css" => Some(Self::Css),
231 "dart" => Some(Self::Dart),
232 "dockerfile" | "docker" => Some(Self::Dockerfile),
233 "elixir" | "ex" => Some(Self::Elixir),
234 "erlang" | "erl" => Some(Self::Erlang),
235 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
236 "groovy" => Some(Self::Groovy),
237 "haskell" | "hs" => Some(Self::Haskell),
238 "html" | "htm" => Some(Self::Html),
239 "julia" | "jl" => Some(Self::Julia),
240 "kotlin" | "kt" => Some(Self::Kotlin),
241 "lua" => Some(Self::Lua),
242 "makefile" | "make" | "mk" => Some(Self::Makefile),
243 "nim" => Some(Self::Nim),
244 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
245 "ocaml" | "ml" => Some(Self::Ocaml),
246 "perl" | "pl" => Some(Self::Perl),
247 "php" => Some(Self::Php),
248 "r" => Some(Self::R),
249 "ruby" | "rb" => Some(Self::Ruby),
250 "scala" => Some(Self::Scala),
251 "scss" | "sass" => Some(Self::Scss),
252 "sql" => Some(Self::Sql),
253 "svelte" => Some(Self::Svelte),
254 "swift" => Some(Self::Swift),
255 "vue" => Some(Self::Vue),
256 "xml" => Some(Self::Xml),
257 "zig" => Some(Self::Zig),
258 "solidity" | "sol" => Some(Self::Solidity),
259 "protobuf" | "proto" | "protocolbuffers" => Some(Self::Protobuf),
260 "hcl" | "terraform" | "tf" => Some(Self::Hcl),
261 "graphql" | "gql" => Some(Self::GraphQl),
262 "ada" => Some(Self::Ada),
263 "vhdl" => Some(Self::Vhdl),
264 "verilog" | "systemverilog" | "sv" => Some(Self::Verilog),
265 "tcl" => Some(Self::Tcl),
266 "pascal" | "delphi" | "pas" => Some(Self::Pascal),
267 "visualbasic" | "vb" | "vbnet" | "vb.net" => Some(Self::VisualBasic),
268 "lisp" | "scheme" | "racket" | "clisp" | "elisp" => Some(Self::Lisp),
269 "fortran" | "f90" | "f95" => Some(Self::Fortran),
270 "nix" => Some(Self::Nix),
271 "crystal" | "cr" => Some(Self::Crystal),
272 "d" | "dlang" => Some(Self::D),
273 "glsl" | "hlsl" | "shader" | "wgsl" => Some(Self::Glsl),
274 "cmake" => Some(Self::Cmake),
275 "elm" => Some(Self::Elm),
276 "awk" => Some(Self::Awk),
277 _ => None,
278 }
279 }
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, Default)]
283pub struct RawLineCounts {
284 pub total_physical_lines: u64,
285 pub blank_only_lines: u64,
286 pub code_only_lines: u64,
287 pub single_comment_only_lines: u64,
288 pub multi_comment_only_lines: u64,
289 pub mixed_code_single_comment_lines: u64,
290 pub mixed_code_multi_comment_lines: u64,
291 pub docstring_comment_lines: u64,
292 pub skipped_unknown_lines: u64,
293 #[serde(default)]
295 pub functions: u64,
296 #[serde(default)]
298 pub classes: u64,
299 #[serde(default)]
301 pub variables: u64,
302 #[serde(default)]
304 pub imports: u64,
305 #[serde(default)]
309 pub compiler_directive_lines: u64,
310 #[serde(default)]
313 pub test_count: u64,
314 #[serde(default)]
317 pub test_assertion_count: u64,
318 #[serde(default)]
321 pub test_suite_count: u64,
322 #[serde(default)]
325 pub cyclomatic_complexity: u32,
326 #[serde(default, skip_serializing_if = "Option::is_none")]
329 pub lsloc: Option<u32>,
330 #[serde(skip)]
333 pub code_line_hashes: Vec<u64>,
334}
335
336#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ParseMode {
339 Lexical,
340 LexicalBestEffort,
341 TreeSitter,
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct RawFileAnalysis {
346 pub raw: RawLineCounts,
347 pub parse_mode: ParseMode,
348 pub warnings: Vec<String>,
349 #[serde(default, skip_serializing_if = "Option::is_none")]
351 pub style_analysis: Option<StyleAnalysis>,
352}
353
354#[derive(Debug, Clone, Copy)]
359pub struct AnalysisOptions {
360 pub blank_in_block_comment_as_comment: bool,
363 pub collapse_continuation_lines: bool,
366 pub enable_style: bool,
369 pub style_lang_scope: StyleLangScope,
372}
373
374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
376pub enum StyleLangScope {
377 All,
378 CFamilyOnly,
379}
380
381#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum LslocStrategy {
384 Semicolons,
386 NonContinuationNewlines,
389 Unsupported,
392}
393
394impl Default for AnalysisOptions {
395 fn default() -> Self {
396 Self {
397 blank_in_block_comment_as_comment: true,
398 collapse_continuation_lines: false,
399 enable_style: true,
400 style_lang_scope: StyleLangScope::All,
401 }
402 }
403}
404
405#[must_use]
406pub fn supported_languages() -> BTreeSet<Language> {
407 [
408 Language::Assembly,
409 Language::C,
410 Language::Clojure,
411 Language::Cpp,
412 Language::CSharp,
413 Language::Css,
414 Language::Dart,
415 Language::Dockerfile,
416 Language::Elixir,
417 Language::Erlang,
418 Language::FSharp,
419 Language::Go,
420 Language::Groovy,
421 Language::Haskell,
422 Language::Html,
423 Language::Java,
424 Language::JavaScript,
425 Language::Julia,
426 Language::Kotlin,
427 Language::Lua,
428 Language::Makefile,
429 Language::Nim,
430 Language::ObjectiveC,
431 Language::Ocaml,
432 Language::Perl,
433 Language::Php,
434 Language::PowerShell,
435 Language::Python,
436 Language::R,
437 Language::Ruby,
438 Language::Rust,
439 Language::Scala,
440 Language::Scss,
441 Language::Shell,
442 Language::Sql,
443 Language::Svelte,
444 Language::Swift,
445 Language::TypeScript,
446 Language::Vue,
447 Language::Xml,
448 Language::Zig,
449 Language::Solidity,
450 Language::Protobuf,
451 Language::Hcl,
452 Language::GraphQl,
453 Language::Ada,
454 Language::Vhdl,
455 Language::Verilog,
456 Language::Tcl,
457 Language::Pascal,
458 Language::VisualBasic,
459 Language::Lisp,
460 Language::Fortran,
461 Language::Nix,
462 Language::Crystal,
463 Language::D,
464 Language::Glsl,
465 Language::Cmake,
466 Language::Elm,
467 Language::Awk,
468 ]
469 .into_iter()
470 .collect()
471}
472
473fn detect_by_shebang(line: &str) -> Option<Language> {
475 let lower = line.to_ascii_lowercase();
476 if !lower.starts_with("#!") {
477 return None;
478 }
479 if lower.contains("python") {
480 return Some(Language::Python);
481 }
482 if lower.contains("pwsh") || lower.contains("powershell") {
483 return Some(Language::PowerShell);
484 }
485 if lower.contains("bash")
486 || lower.contains("/sh")
487 || lower.contains("zsh")
488 || lower.contains("ksh")
489 {
490 return Some(Language::Shell);
491 }
492 if lower.contains("ruby") {
493 return Some(Language::Ruby);
494 }
495 if lower.contains("perl") {
496 return Some(Language::Perl);
497 }
498 if lower.contains("php") {
499 return Some(Language::Php);
500 }
501 if lower.contains("node") || lower.contains("nodejs") {
502 return Some(Language::JavaScript);
503 }
504 None
505}
506
507#[allow(clippy::too_many_lines)]
509fn detect_by_extension(ext: &str) -> Option<Language> {
510 static EXT_MAP: &[(&str, Language)] = &[
512 ("c", Language::C),
513 ("h", Language::C),
514 ("cc", Language::Cpp),
515 ("cp", Language::Cpp),
516 ("cpp", Language::Cpp),
517 ("cxx", Language::Cpp),
518 ("hh", Language::Cpp),
519 ("hpp", Language::Cpp),
520 ("hxx", Language::Cpp),
521 ("cs", Language::CSharp),
522 ("go", Language::Go),
523 ("java", Language::Java),
524 ("js", Language::JavaScript),
525 ("mjs", Language::JavaScript),
526 ("cjs", Language::JavaScript),
527 ("py", Language::Python),
528 ("rs", Language::Rust),
529 ("sh", Language::Shell),
530 ("bash", Language::Shell),
531 ("zsh", Language::Shell),
532 ("ksh", Language::Shell),
533 ("ps1", Language::PowerShell),
534 ("psm1", Language::PowerShell),
535 ("psd1", Language::PowerShell),
536 ("ts", Language::TypeScript),
537 ("mts", Language::TypeScript),
538 ("cts", Language::TypeScript),
539 ("tsx", Language::TypeScript),
540 ("jsx", Language::JavaScript),
541 ("asm", Language::Assembly),
542 ("s", Language::Assembly),
543 ("clj", Language::Clojure),
544 ("cljs", Language::Clojure),
545 ("cljc", Language::Clojure),
546 ("edn", Language::Clojure),
547 ("css", Language::Css),
548 ("dart", Language::Dart),
549 ("ex", Language::Elixir),
550 ("exs", Language::Elixir),
551 ("erl", Language::Erlang),
552 ("hrl", Language::Erlang),
553 ("fs", Language::FSharp),
554 ("fsi", Language::FSharp),
555 ("fsx", Language::FSharp),
556 ("groovy", Language::Groovy),
557 ("gradle", Language::Groovy),
558 ("hs", Language::Haskell),
559 ("lhs", Language::Haskell),
560 ("html", Language::Html),
561 ("htm", Language::Html),
562 ("xhtml", Language::Html),
563 ("jl", Language::Julia),
564 ("kt", Language::Kotlin),
565 ("kts", Language::Kotlin),
566 ("lua", Language::Lua),
567 ("mk", Language::Makefile),
568 ("nim", Language::Nim),
569 ("nims", Language::Nim),
570 ("m", Language::ObjectiveC),
571 ("mm", Language::ObjectiveC),
572 ("ml", Language::Ocaml),
573 ("mli", Language::Ocaml),
574 ("pl", Language::Perl),
575 ("pm", Language::Perl),
576 ("t", Language::Perl),
577 ("php", Language::Php),
578 ("php3", Language::Php),
579 ("php4", Language::Php),
580 ("php5", Language::Php),
581 ("php7", Language::Php),
582 ("phtml", Language::Php),
583 ("r", Language::R),
584 ("rb", Language::Ruby),
585 ("rake", Language::Ruby),
586 ("scala", Language::Scala),
587 ("sc", Language::Scala),
588 ("scss", Language::Scss),
589 ("sass", Language::Scss),
590 ("sql", Language::Sql),
591 ("svelte", Language::Svelte),
592 ("swift", Language::Swift),
593 ("vue", Language::Vue),
594 ("xml", Language::Xml),
595 ("xsd", Language::Xml),
596 ("xsl", Language::Xml),
597 ("xslt", Language::Xml),
598 ("svg", Language::Xml),
599 ("zig", Language::Zig),
600 ("sol", Language::Solidity),
601 ("proto", Language::Protobuf),
602 ("tf", Language::Hcl),
603 ("tfvars", Language::Hcl),
604 ("hcl", Language::Hcl),
605 ("graphql", Language::GraphQl),
606 ("gql", Language::GraphQl),
607 ("adb", Language::Ada),
608 ("ads", Language::Ada),
609 ("ada", Language::Ada),
610 ("vhd", Language::Vhdl),
611 ("vhdl", Language::Vhdl),
612 ("v", Language::Verilog),
613 ("sv", Language::Verilog),
614 ("svh", Language::Verilog),
615 ("vh", Language::Verilog),
616 ("tcl", Language::Tcl),
617 ("pas", Language::Pascal),
618 ("dpr", Language::Pascal),
619 ("vb", Language::VisualBasic),
620 ("bas", Language::VisualBasic),
621 ("lisp", Language::Lisp),
622 ("lsp", Language::Lisp),
623 ("el", Language::Lisp),
624 ("scm", Language::Lisp),
625 ("ss", Language::Lisp),
626 ("rkt", Language::Lisp),
627 ("f90", Language::Fortran),
628 ("f95", Language::Fortran),
629 ("f03", Language::Fortran),
630 ("f08", Language::Fortran),
631 ("f", Language::Fortran),
632 ("for", Language::Fortran),
633 ("nix", Language::Nix),
634 ("cr", Language::Crystal),
635 ("d", Language::D),
636 ("glsl", Language::Glsl),
637 ("vert", Language::Glsl),
638 ("frag", Language::Glsl),
639 ("comp", Language::Glsl),
640 ("geom", Language::Glsl),
641 ("tesc", Language::Glsl),
642 ("tese", Language::Glsl),
643 ("hlsl", Language::Glsl),
644 ("wgsl", Language::Glsl),
645 ("cmake", Language::Cmake),
646 ("elm", Language::Elm),
647 ("awk", Language::Awk),
648 ];
649 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
650}
651
652fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
654 if filename == "Dockerfile"
656 || filename.starts_with("Dockerfile.")
657 || filename_lower == "dockerfile"
658 {
659 return Some(Language::Dockerfile);
660 }
661 if matches!(
663 filename,
664 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
665 ) {
666 return Some(Language::Makefile);
667 }
668 if matches!(
670 filename,
671 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
672 ) {
673 return Some(Language::Ruby);
674 }
675 if filename == "CMakeLists.txt" || filename_lower == "cmakelists.txt" {
678 return Some(Language::Cmake);
679 }
680 None
681}
682
683#[must_use]
684#[allow(clippy::too_many_lines)]
685pub fn detect_language(
686 path: &Path,
687 first_line: Option<&str>,
688 extension_overrides: &BTreeMap<String, String>,
689 shebang_detection: bool,
690) -> Option<Language> {
691 let extension = path
692 .extension()
693 .and_then(|ext| ext.to_str())
694 .map(str::to_ascii_lowercase);
695
696 if let Some(ext) = extension.as_ref() {
698 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
699 if let Some(lang) = Language::from_name(override_name) {
700 return Some(lang);
701 }
702 }
703 }
704
705 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
707 let filename_lower = filename.to_ascii_lowercase();
708
709 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
710 return Some(lang);
711 }
712
713 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
715 return Some(lang);
716 }
717
718 if shebang_detection {
720 if let Some(line) = first_line {
721 if let Some(lang) = detect_by_shebang(line) {
722 return Some(lang);
723 }
724 }
725 }
726
727 None
728}
729
730#[must_use]
731pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
732 #[cfg(feature = "tree-sitter")]
734 {
735 match language {
736 Language::C | Language::Cpp => {
737 if let Some(mut result) = ts::analyze_c(text) {
738 if options.enable_style
739 && should_style_analyse(language, options.style_lang_scope)
740 {
741 result.style_analysis = style::analyze_style(language, text);
742 }
743 return result;
744 }
745 }
746 Language::Python => {
747 if let Some(result) = ts::analyze_python(text) {
748 return result;
749 }
750 }
751 _ => {}
752 }
753 }
754
755 let (mut config, has_preprocessor) = language_scan_config(language);
756
757 if language == Language::Python {
759 config.skip_lines = detect_python_docstring_lines(text);
760 }
761
762 let flags = IeeeFlags {
765 has_preprocessor_directives: has_preprocessor,
766 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
767 collapse_continuation_lines: options.collapse_continuation_lines,
768 };
769 let mut result = analyze_generic(text, config, flags);
770 if options.enable_style && should_style_analyse(language, options.style_lang_scope) {
771 result.style_analysis = style::analyze_style(language, text);
772 }
773 result
774}
775
776const fn should_style_analyse(language: Language, scope: StyleLangScope) -> bool {
778 match scope {
779 StyleLangScope::CFamilyOnly => {
780 matches!(language, Language::C | Language::Cpp | Language::ObjectiveC)
781 }
782 StyleLangScope::All => true,
783 }
784}
785
786fn language_scan_config(language: Language) -> (ScanConfig, bool) {
794 let cfg = LANG_SCAN_TABLE
795 .iter()
796 .find_map(|&(l, c)| (l == language).then_some(c))
797 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
798 let (branch_keywords, lsloc_strategy) = language_complexity_config(language);
799 (
800 ScanConfig {
801 line_comments: cfg.line_comments,
802 block_comment: cfg.block_comment,
803 allow_single_quote_strings: cfg.allow_single_quote_strings,
804 allow_double_quote_strings: cfg.allow_double_quote_strings,
805 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
806 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
807 skip_lines: HashSet::new(),
808 symbol_patterns: cfg.symbol_patterns,
809 branch_keywords,
810 lsloc_strategy,
811 },
812 cfg.has_preprocessor,
813 )
814}
815
816const BRANCH_C_FAMILY: &[&str] = &[
821 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&",
822];
823const BRANCH_C_TERNARY: &[&str] = &[
824 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&", "?",
825];
826const BRANCH_GO: &[&str] = &["if", "else", "for", "switch", "case", "select", "||", "&&"];
827const BRANCH_RUST: &[&str] = &["if", "else", "for", "while", "match", "||", "&&"];
828const BRANCH_ZIG: &[&str] = &["if", "else", "for", "while", "switch", "catch", "||", "&&"];
829const BRANCH_FSHARP: &[&str] = &["if", "then", "else", "elif", "match", "when", "||", "&&"];
830const BRANCH_LUA: &[&str] = &[
831 "if", "elseif", "else", "for", "while", "repeat", "and", "or",
832];
833const BRANCH_HASKELL: &[&str] = &["if", "then", "else", "case", "otherwise"];
834const BRANCH_SQL: &[&str] = &["CASE", "WHEN", "IF", "ELSE", "case", "when", "if", "else"];
835const BRANCH_OCAML: &[&str] = &["if", "then", "else", "match", "when", "||", "&&"];
836const BRANCH_CLOJURE: &[&str] = &["if", "when", "cond", "case", "and", "or"];
837const BRANCH_PHP: &[&str] = &[
838 "if", "elseif", "else", "for", "while", "switch", "case", "catch", "match", "||", "&&", "?",
839];
840const BRANCH_JULIA: &[&str] = &["if", "elseif", "else", "for", "while", "catch", "||", "&&"];
841const BRANCH_PYTHON: &[&str] = &["if", "elif", "else", "for", "while", "except", "or", "and"];
842const BRANCH_RUBY: &[&str] = &[
843 "if", "elsif", "else", "unless", "until", "while", "case", "when", "rescue", "||", "&&",
844];
845const BRANCH_SHELL: &[&str] = &["if", "elif", "else", "while", "until", "case", "||", "&&"];
846const BRANCH_ELIXIR: &[&str] = &[
847 "if", "else", "cond", "case", "when", "rescue", "||", "&&", "and", "or",
848];
849const BRANCH_POWERSHELL: &[&str] = &[
850 "if", "elseif", "else", "for", "while", "switch", "foreach", "||", "&&",
851];
852const BRANCH_NIM: &[&str] = &[
853 "if", "elif", "else", "for", "while", "case", "of", "except", "and", "or",
854];
855const BRANCH_PERL: &[&str] = &[
856 "if", "elsif", "else", "unless", "until", "for", "while", "foreach", "||", "&&",
857];
858const BRANCH_R: &[&str] = &["if", "else", "for", "while", "repeat", "||", "&&"];
859const BRANCH_ADA: &[&str] = &[
861 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or",
862];
863const BRANCH_VHDL: &[&str] = &[
864 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or", "nand", "nor",
865 "xor",
866];
867const BRANCH_VERILOG: &[&str] = &[
868 "if", "else", "case", "casex", "casez", "for", "while", "&&", "||",
869];
870const BRANCH_TCL: &[&str] = &["if", "elseif", "else", "switch", "while", "for", "foreach"];
871const BRANCH_PASCAL: &[&str] = &[
872 "if", "then", "else", "case", "while", "for", "repeat", "until", "and", "or",
873];
874const BRANCH_VB: &[&str] = &[
875 "If", "Then", "ElseIf", "Else", "Select", "Case", "While", "For", "Do", "And", "Or",
876];
877const BRANCH_LISP: &[&str] = &["if", "when", "unless", "cond", "case", "and", "or"];
878const BRANCH_FORTRAN: &[&str] = &[
880 "if", "then", "else", "elseif", "case", "do", "while", "where",
881];
882const BRANCH_NIX: &[&str] = &["if", "then", "else"];
883const BRANCH_CMAKE: &[&str] = &["if(", "elseif(", "else(", "while(", "foreach("];
884const BRANCH_ELM: &[&str] = &["if", "then", "else", "case", "of"];
885const BRANCH_AWK: &[&str] = &["if", "else", "while", "for", "do"];
886
887const fn language_complexity_config(
890 language: Language,
891) -> (&'static [&'static str], LslocStrategy) {
892 match language {
893 Language::C
895 | Language::Cpp
896 | Language::ObjectiveC
897 | Language::CSharp
898 | Language::JavaScript
899 | Language::TypeScript
900 | Language::Svelte
901 | Language::Vue
902 | Language::Dart
903 | Language::Groovy
904 | Language::Swift
905 | Language::Solidity => (BRANCH_C_TERNARY, LslocStrategy::Semicolons),
906 Language::Java | Language::Kotlin | Language::Scala | Language::D | Language::Glsl => {
908 (BRANCH_C_FAMILY, LslocStrategy::Semicolons)
909 }
910 Language::Go => (BRANCH_GO, LslocStrategy::Semicolons),
911 Language::Rust => (BRANCH_RUST, LslocStrategy::Semicolons),
912 Language::Zig => (BRANCH_ZIG, LslocStrategy::Semicolons),
913 Language::FSharp => (BRANCH_FSHARP, LslocStrategy::Unsupported),
914 Language::Shell => (BRANCH_SHELL, LslocStrategy::NonContinuationNewlines),
916 Language::Elixir => (BRANCH_ELIXIR, LslocStrategy::NonContinuationNewlines),
917 Language::Perl => (BRANCH_PERL, LslocStrategy::Semicolons),
918 Language::R => (BRANCH_R, LslocStrategy::NonContinuationNewlines),
919 Language::Ruby | Language::Crystal => (BRANCH_RUBY, LslocStrategy::NonContinuationNewlines),
920 Language::Python => (BRANCH_PYTHON, LslocStrategy::NonContinuationNewlines),
921 Language::PowerShell => (BRANCH_POWERSHELL, LslocStrategy::Unsupported),
922 Language::Nim => (BRANCH_NIM, LslocStrategy::NonContinuationNewlines),
923 Language::Lua => (BRANCH_LUA, LslocStrategy::Unsupported),
925 Language::Haskell => (BRANCH_HASKELL, LslocStrategy::Unsupported),
926 Language::Sql => (BRANCH_SQL, LslocStrategy::Semicolons),
927 Language::Ocaml => (BRANCH_OCAML, LslocStrategy::Semicolons),
928 Language::Clojure => (BRANCH_CLOJURE, LslocStrategy::Unsupported),
929 Language::Php => (BRANCH_PHP, LslocStrategy::Semicolons),
930 Language::Julia => (BRANCH_JULIA, LslocStrategy::NonContinuationNewlines),
931 Language::Protobuf => (&[], LslocStrategy::Semicolons),
932 Language::Hcl => (&[], LslocStrategy::NonContinuationNewlines),
933 Language::Ada => (BRANCH_ADA, LslocStrategy::Semicolons),
935 Language::Vhdl => (BRANCH_VHDL, LslocStrategy::Semicolons),
936 Language::Verilog => (BRANCH_VERILOG, LslocStrategy::Semicolons),
937 Language::Tcl => (BRANCH_TCL, LslocStrategy::NonContinuationNewlines),
938 Language::Pascal => (BRANCH_PASCAL, LslocStrategy::Semicolons),
939 Language::VisualBasic => (BRANCH_VB, LslocStrategy::NonContinuationNewlines),
940 Language::Lisp => (BRANCH_LISP, LslocStrategy::Unsupported),
941 Language::Fortran => (BRANCH_FORTRAN, LslocStrategy::NonContinuationNewlines),
943 Language::Nix => (BRANCH_NIX, LslocStrategy::Unsupported),
944 Language::Cmake => (BRANCH_CMAKE, LslocStrategy::Unsupported),
945 Language::Elm => (BRANCH_ELM, LslocStrategy::Unsupported),
946 Language::Awk => (BRANCH_AWK, LslocStrategy::NonContinuationNewlines),
947 Language::Makefile
949 | Language::Dockerfile
950 | Language::Css
951 | Language::Html
952 | Language::Xml
953 | Language::Assembly
954 | Language::Erlang
955 | Language::GraphQl
956 | Language::Scss => (&[], LslocStrategy::Unsupported),
957 }
958}
959
960#[derive(Debug, Clone, Copy)]
964struct SymbolPatterns {
965 functions: &'static [&'static str],
966 functions_prefix_paren: &'static [&'static str],
972 classes: &'static [&'static str],
973 variables: &'static [&'static str],
974 imports: &'static [&'static str],
975 tests: &'static [&'static str],
978 assertions: &'static [&'static str],
981 test_suites: &'static [&'static str],
984 variables_prefix_no_paren: &'static [&'static str],
990}
991
992impl SymbolPatterns {
993 const fn none() -> Self {
994 Self {
995 functions: &[],
996 functions_prefix_paren: &[],
997 classes: &[],
998 variables: &[],
999 imports: &[],
1000 tests: &[],
1001 assertions: &[],
1002 test_suites: &[],
1003 variables_prefix_no_paren: &[],
1004 }
1005 }
1006}
1007
1008const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_SOLIDITY: SymbolPatterns = SymbolPatterns {
1013 functions: &[
1014 "function ",
1015 "modifier ",
1016 "constructor",
1017 "receive ",
1018 "fallback ",
1019 ],
1020 functions_prefix_paren: &[],
1021 classes: &["contract ", "interface ", "library ", "struct ", "enum "],
1022 variables: &[],
1023 imports: &["import "],
1024 tests: &[],
1025 assertions: &[],
1026 test_suites: &[],
1027 variables_prefix_no_paren: &[],
1028};
1029
1030const SP_PROTOBUF: SymbolPatterns = SymbolPatterns {
1033 functions: &["rpc "],
1034 functions_prefix_paren: &[],
1035 classes: &["message ", "service ", "enum "],
1036 variables: &[],
1037 imports: &["import "],
1038 tests: &[],
1039 assertions: &[],
1040 test_suites: &[],
1041 variables_prefix_no_paren: &[],
1042};
1043
1044const SP_ADA: SymbolPatterns = SymbolPatterns {
1046 functions: &["procedure ", "function "],
1047 functions_prefix_paren: &[],
1048 classes: &["package ", "type ", "task ", "protected "],
1049 variables: &[],
1050 imports: &["with ", "use "],
1051 tests: &[],
1052 assertions: &[],
1053 test_suites: &[],
1054 variables_prefix_no_paren: &[],
1055};
1056
1057const SP_VHDL: SymbolPatterns = SymbolPatterns {
1058 functions: &["function ", "procedure ", "process "],
1059 functions_prefix_paren: &[],
1060 classes: &["entity ", "architecture ", "package ", "component "],
1061 variables: &[],
1062 imports: &["library ", "use "],
1063 tests: &[],
1064 assertions: &[],
1065 test_suites: &[],
1066 variables_prefix_no_paren: &[],
1067};
1068
1069const SP_VERILOG: SymbolPatterns = SymbolPatterns {
1070 functions: &["function ", "task "],
1071 functions_prefix_paren: &[],
1072 classes: &["module ", "interface ", "class ", "package "],
1073 variables: &[],
1074 imports: &["import ", "`include"],
1075 tests: &[],
1076 assertions: &[],
1077 test_suites: &[],
1078 variables_prefix_no_paren: &[],
1079};
1080
1081const SP_TCL: SymbolPatterns = SymbolPatterns {
1082 functions: &["proc "],
1083 functions_prefix_paren: &[],
1084 classes: &[],
1085 variables: &[],
1086 imports: &["source ", "package require "],
1087 tests: &[],
1088 assertions: &[],
1089 test_suites: &[],
1090 variables_prefix_no_paren: &[],
1091};
1092
1093const SP_PASCAL: SymbolPatterns = SymbolPatterns {
1094 functions: &["procedure ", "function "],
1095 functions_prefix_paren: &[],
1096 classes: &["type ", "class ", "record "],
1097 variables: &[],
1098 imports: &["uses "],
1099 tests: &[],
1100 assertions: &[],
1101 test_suites: &[],
1102 variables_prefix_no_paren: &[],
1103};
1104
1105const SP_VB: SymbolPatterns = SymbolPatterns {
1106 functions: &[
1107 "Sub ",
1108 "Function ",
1109 "Private Sub ",
1110 "Public Sub ",
1111 "Private Function ",
1112 "Public Function ",
1113 ],
1114 functions_prefix_paren: &[],
1115 classes: &["Class ", "Module ", "Structure "],
1116 variables: &[],
1117 imports: &["Imports "],
1118 tests: &[],
1119 assertions: &[],
1120 test_suites: &[],
1121 variables_prefix_no_paren: &[],
1122};
1123
1124const SP_LISP: SymbolPatterns = SymbolPatterns {
1125 functions: &["(defun ", "(defmacro ", "(define ", "(defmethod ", "(defn "],
1126 functions_prefix_paren: &[],
1127 classes: &["(defclass ", "(defstruct "],
1128 variables: &[],
1129 imports: &["(require ", "(import ", "(use-package "],
1130 tests: &[],
1131 assertions: &[],
1132 test_suites: &[],
1133 variables_prefix_no_paren: &[],
1134};
1135
1136const SP_FORTRAN: SymbolPatterns = SymbolPatterns {
1138 functions: &["subroutine ", "function "],
1139 functions_prefix_paren: &[],
1140 classes: &["module ", "program ", "type "],
1141 variables: &[],
1142 imports: &["use ", "include "],
1143 tests: &[],
1144 assertions: &[],
1145 test_suites: &[],
1146 variables_prefix_no_paren: &[],
1147};
1148
1149const SP_CRYSTAL: SymbolPatterns = SymbolPatterns {
1150 functions: &["def "],
1151 functions_prefix_paren: &[],
1152 classes: &["class ", "module ", "struct ", "enum "],
1153 variables: &[],
1154 imports: &["require "],
1155 tests: &[],
1156 assertions: &[],
1157 test_suites: &[],
1158 variables_prefix_no_paren: &[],
1159};
1160
1161const SP_D: SymbolPatterns = SymbolPatterns {
1162 functions: &[],
1163 functions_prefix_paren: &[],
1164 classes: &["class ", "struct ", "interface ", "enum ", "template "],
1165 variables: &[],
1166 imports: &["import "],
1167 tests: &[],
1168 assertions: &[],
1169 test_suites: &[],
1170 variables_prefix_no_paren: &[],
1171};
1172
1173const SP_CMAKE: SymbolPatterns = SymbolPatterns {
1174 functions: &["function(", "macro("],
1175 functions_prefix_paren: &[],
1176 classes: &[],
1177 variables: &[],
1178 imports: &["include(", "add_subdirectory("],
1179 tests: &[],
1180 assertions: &[],
1181 test_suites: &[],
1182 variables_prefix_no_paren: &[],
1183};
1184
1185const SP_ELM: SymbolPatterns = SymbolPatterns {
1186 functions: &[],
1187 functions_prefix_paren: &[],
1188 classes: &["type "],
1189 variables: &[],
1190 imports: &["import "],
1191 tests: &[],
1192 assertions: &[],
1193 test_suites: &[],
1194 variables_prefix_no_paren: &[],
1195};
1196
1197const SP_AWK: SymbolPatterns = SymbolPatterns {
1198 functions: &["function "],
1199 functions_prefix_paren: &[],
1200 classes: &[],
1201 variables: &[],
1202 imports: &[],
1203 tests: &[],
1204 assertions: &[],
1205 test_suites: &[],
1206 variables_prefix_no_paren: &[],
1207};
1208
1209const SP_RUST: SymbolPatterns = SymbolPatterns {
1210 functions: &[
1211 "fn ",
1212 "pub fn ",
1213 "pub(crate) fn ",
1214 "pub(super) fn ",
1215 "async fn ",
1216 "pub async fn ",
1217 "pub(crate) async fn ",
1218 "unsafe fn ",
1219 "pub unsafe fn ",
1220 "pub(crate) unsafe fn ",
1221 "const fn ",
1222 "pub const fn ",
1223 "pub(crate) const fn ",
1224 "extern fn ",
1225 "pub extern fn ",
1226 ],
1227 functions_prefix_paren: &[],
1228 classes: &[
1229 "struct ",
1230 "pub struct ",
1231 "pub(crate) struct ",
1232 "enum ",
1233 "pub enum ",
1234 "pub(crate) enum ",
1235 "trait ",
1236 "pub trait ",
1237 "pub(crate) trait ",
1238 "impl ",
1239 "impl<",
1240 "type ",
1241 "pub type ",
1242 "pub(crate) type ",
1243 ],
1244 variables: &["let ", "let mut "],
1245 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
1246 tests: &[
1248 "#[test]",
1249 "#[tokio::test]",
1250 "#[actix_web::test]",
1251 "#[rstest]",
1252 "#[test_case",
1253 ],
1254 assertions: &[
1255 "assert_eq!(",
1256 "assert_ne!(",
1257 "assert!(",
1258 "assert_matches!(",
1259 "assert_err!(",
1260 "assert_ok!(",
1261 ],
1262 test_suites: &[],
1263 variables_prefix_no_paren: &[],
1264};
1265
1266const SP_PYTHON: SymbolPatterns = SymbolPatterns {
1267 functions: &["def ", "async def "],
1268 functions_prefix_paren: &[],
1269 classes: &["class "],
1270 variables: &[],
1271 imports: &["import ", "from "],
1272 tests: &["def test_", "async def test_", "class Test"],
1274 assertions: &[
1275 "self.assertEqual(",
1276 "self.assertNotEqual(",
1277 "self.assertTrue(",
1278 "self.assertFalse(",
1279 "self.assertIsNone(",
1280 "self.assertIsNotNone(",
1281 "self.assertIn(",
1282 "self.assertNotIn(",
1283 "self.assertRaises(",
1284 "self.assertAlmostEqual(",
1285 ],
1286 test_suites: &[],
1287 variables_prefix_no_paren: &[],
1288};
1289
1290const SP_JS: SymbolPatterns = SymbolPatterns {
1291 functions: &[
1292 "function ",
1293 "async function ",
1294 "export function ",
1295 "export async function ",
1296 "export default function ",
1297 ],
1298 functions_prefix_paren: &[],
1299 classes: &["class ", "export class ", "export default class "],
1300 variables: &[
1301 "var ",
1302 "let ",
1303 "const ",
1304 "export var ",
1305 "export let ",
1306 "export const ",
1307 ],
1308 imports: &["import "],
1309 tests: &[
1311 "describe(",
1312 "it(",
1313 "test(",
1314 "it.each(",
1315 "test.each(",
1316 "describe.each(",
1317 ],
1318 assertions: &["expect("],
1319 test_suites: &[],
1320 variables_prefix_no_paren: &[],
1321};
1322
1323const SP_TS: SymbolPatterns = SymbolPatterns {
1324 functions: &[
1325 "function ",
1326 "async function ",
1327 "export function ",
1328 "export async function ",
1329 "export default function ",
1330 ],
1331 functions_prefix_paren: &[],
1332 classes: &[
1333 "class ",
1334 "export class ",
1335 "export default class ",
1336 "abstract class ",
1337 "export abstract class ",
1338 "interface ",
1339 "export interface ",
1340 "declare class ",
1341 "declare interface ",
1342 ],
1343 variables: &[
1344 "var ",
1345 "let ",
1346 "const ",
1347 "export var ",
1348 "export let ",
1349 "export const ",
1350 ],
1351 imports: &["import "],
1352 tests: &[
1354 "describe(",
1355 "it(",
1356 "test(",
1357 "it.each(",
1358 "test.each(",
1359 "describe.each(",
1360 ],
1361 assertions: &["expect("],
1362 test_suites: &[],
1363 variables_prefix_no_paren: &[],
1364};
1365
1366const SP_GO: SymbolPatterns = SymbolPatterns {
1367 functions: &["func "],
1368 functions_prefix_paren: &[],
1369 classes: &["type "],
1370 variables: &["var "],
1371 imports: &["import "],
1372 tests: &["func Test", "func Benchmark", "func Fuzz"],
1374 assertions: &[],
1375 test_suites: &[],
1376 variables_prefix_no_paren: &[],
1377};
1378
1379const SP_JAVA: SymbolPatterns = SymbolPatterns {
1380 functions: &[],
1381 functions_prefix_paren: &[],
1382 classes: &[
1383 "class ",
1384 "public class ",
1385 "private class ",
1386 "protected class ",
1387 "abstract class ",
1388 "final class ",
1389 "public abstract class ",
1390 "public final class ",
1391 "interface ",
1392 "public interface ",
1393 "enum ",
1394 "public enum ",
1395 "record ",
1396 "public record ",
1397 "@interface ",
1398 ],
1399 variables: &[],
1400 imports: &["import "],
1401 tests: &[
1403 "@Test",
1404 "@ParameterizedTest",
1405 "@RepeatedTest",
1406 "@TestFactory",
1407 "@TestTemplate",
1408 ],
1409 assertions: &[
1410 "assertEquals(",
1411 "assertNotEquals(",
1412 "assertTrue(",
1413 "assertFalse(",
1414 "assertNull(",
1415 "assertNotNull(",
1416 "assertThat(",
1417 "assertThrows(",
1418 "assertAll(",
1419 "assertArrayEquals(",
1420 "assertIterableEquals(",
1421 "assertLinesMatch(",
1422 ],
1423 test_suites: &[],
1424 variables_prefix_no_paren: &[],
1425};
1426
1427const SP_CSHARP: SymbolPatterns = SymbolPatterns {
1428 functions: &[],
1429 functions_prefix_paren: &[],
1430 classes: &[
1431 "class ",
1432 "public class ",
1433 "private class ",
1434 "protected class ",
1435 "internal class ",
1436 "abstract class ",
1437 "sealed class ",
1438 "static class ",
1439 "partial class ",
1440 "public abstract class ",
1441 "public sealed class ",
1442 "public static class ",
1443 "interface ",
1444 "public interface ",
1445 "internal interface ",
1446 "enum ",
1447 "public enum ",
1448 "struct ",
1449 "public struct ",
1450 "record ",
1451 "public record ",
1452 ],
1453 variables: &["var "],
1454 imports: &["using "],
1455 tests: &[
1457 "[TestMethod]",
1458 "[Test]",
1459 "[Fact]",
1460 "[Theory]",
1461 "[TestCase(",
1462 "[DataRow(",
1463 "[InlineData(",
1464 "[MemberData(",
1465 ],
1466 assertions: &[
1467 "Assert.AreEqual(",
1468 "Assert.AreNotEqual(",
1469 "Assert.IsTrue(",
1470 "Assert.IsFalse(",
1471 "Assert.IsNull(",
1472 "Assert.IsNotNull(",
1473 "Assert.Equal(",
1474 "Assert.NotEqual(",
1475 "Assert.True(",
1476 "Assert.False(",
1477 "Assert.That(",
1478 "Assert.Contains(",
1479 "Assert.Throws(",
1480 "Assert.ThrowsAsync(",
1481 "Assert.IsInstanceOfType(",
1482 ],
1483 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
1484 variables_prefix_no_paren: &[],
1485};
1486
1487const TEST_PATTERNS_C_CPP: &[&str] = &[
1489 "TEST(",
1491 "TEST_F(",
1492 "TEST_P(",
1493 "TYPED_TEST(",
1494 "TYPED_TEST_P(",
1495 "INSTANTIATE_TEST_SUITE_P(",
1496 "INSTANTIATE_TYPED_TEST_SUITE_P(",
1497 "TEST_CASE(",
1499 "SECTION(",
1500 "SCENARIO(",
1501 "SCENARIO_METHOD(",
1502 "TEST_CASE_METHOD(",
1503 "BOOST_AUTO_TEST_CASE(",
1505 "BOOST_FIXTURE_TEST_CASE(",
1506 "BOOST_AUTO_TEST_SUITE(",
1507 "BOOST_PARAM_TEST_CASE(",
1508 "CPPUNIT_TEST(",
1510 "CPPUNIT_TEST_SUITE(",
1511 "RUN_TEST(",
1513 "TEST_IGNORE(",
1514 "TEST_FAIL(",
1515 "START_TEST(",
1517 "tcase_add_test(",
1518 "suite_create(",
1519 "cmocka_unit_test(",
1521 "cmocka_run_group_tests(",
1522 "IGNORE_TEST(",
1524 "TEST_GROUP(",
1525 "TEST_GROUP_BASE(",
1526];
1527
1528const ASSERT_PATTERNS_C_CPP: &[&str] = &[
1530 "ASSERT_EQ(",
1532 "ASSERT_NE(",
1533 "ASSERT_LT(",
1534 "ASSERT_LE(",
1535 "ASSERT_GT(",
1536 "ASSERT_GE(",
1537 "ASSERT_TRUE(",
1538 "ASSERT_FALSE(",
1539 "ASSERT_STREQ(",
1540 "ASSERT_STRNE(",
1541 "ASSERT_FLOAT_EQ(",
1542 "ASSERT_DOUBLE_EQ(",
1543 "ASSERT_NEAR(",
1544 "ASSERT_THROW(",
1545 "ASSERT_NO_THROW(",
1546 "ASSERT_ANY_THROW(",
1547 "EXPECT_EQ(",
1549 "EXPECT_NE(",
1550 "EXPECT_LT(",
1551 "EXPECT_LE(",
1552 "EXPECT_GT(",
1553 "EXPECT_GE(",
1554 "EXPECT_TRUE(",
1555 "EXPECT_FALSE(",
1556 "EXPECT_STREQ(",
1557 "EXPECT_STRNE(",
1558 "EXPECT_FLOAT_EQ(",
1559 "EXPECT_DOUBLE_EQ(",
1560 "EXPECT_NEAR(",
1561 "EXPECT_THROW(",
1562 "EXPECT_NO_THROW(",
1563 "EXPECT_ANY_THROW(",
1564 "REQUIRE(",
1566 "CHECK(",
1567 "REQUIRE_FALSE(",
1568 "CHECK_FALSE(",
1569 "REQUIRE_NOTHROW(",
1570 "CHECK_NOTHROW(",
1571 "REQUIRE_THROWS(",
1572 "CHECK_THROWS(",
1573 "REQUIRE_THAT(",
1574 "CHECK_THAT(",
1575 "TEST_ASSERT_EQUAL(",
1577 "TEST_ASSERT_EQUAL_INT(",
1578 "TEST_ASSERT_EQUAL_STRING(",
1579 "TEST_ASSERT_EQUAL_FLOAT(",
1580 "TEST_ASSERT_EQUAL_DOUBLE(",
1581 "TEST_ASSERT_EQUAL_PTR(",
1582 "TEST_ASSERT_TRUE(",
1583 "TEST_ASSERT_FALSE(",
1584 "TEST_ASSERT_NULL(",
1585 "TEST_ASSERT_NOT_NULL(",
1586 "TEST_ASSERT_BITS_HIGH(",
1587 "TEST_ASSERT_BITS_LOW(",
1588 "assert_int_equal(",
1590 "assert_int_not_equal(",
1591 "assert_string_equal(",
1592 "assert_string_not_equal(",
1593 "assert_true(",
1594 "assert_false(",
1595 "assert_null(",
1596 "assert_non_null(",
1597 "assert_ptr_equal(",
1598 "assert_memory_equal(",
1599 "assert_return_code(",
1600];
1601
1602const SUITE_PATTERNS_C_CPP: &[&str] = &[
1604 "TEST_GROUP(",
1605 "TEST_GROUP_BASE(",
1606 "BOOST_AUTO_TEST_SUITE(",
1607 "CPPUNIT_TEST_SUITE(",
1608 "CPPUNIT_TEST_SUITE_END(",
1609];
1610
1611const SP_C: SymbolPatterns = SymbolPatterns {
1612 functions: &[],
1614 functions_prefix_paren: &[
1615 "void ",
1616 "int ",
1617 "char ",
1618 "float ",
1619 "double ",
1620 "long ",
1621 "unsigned ",
1622 "size_t ",
1623 "static ",
1624 "inline ",
1625 "const ",
1626 "extern ",
1627 ],
1628 classes: &[
1629 "struct ",
1630 "typedef struct ",
1631 "union ",
1632 "typedef union ",
1633 "typedef enum ",
1634 ],
1635 variables: &[],
1636 imports: &["#include "],
1637 tests: TEST_PATTERNS_C_CPP,
1638 assertions: ASSERT_PATTERNS_C_CPP,
1639 test_suites: SUITE_PATTERNS_C_CPP,
1640 variables_prefix_no_paren: &[
1643 "void ",
1644 "int ",
1645 "char ",
1646 "float ",
1647 "double ",
1648 "long ",
1649 "unsigned ",
1650 "size_t ",
1651 "static ",
1652 "inline ",
1653 "const ",
1654 "extern ",
1655 ],
1656};
1657
1658const SP_CPP: SymbolPatterns = SymbolPatterns {
1659 functions: &[
1661 "virtual ", "explicit ", "~", "operator", ],
1666 functions_prefix_paren: &[
1667 "void ",
1668 "bool ",
1669 "int ",
1670 "char ",
1671 "float ",
1672 "double ",
1673 "long ",
1674 "unsigned ",
1675 "size_t ",
1676 "auto ",
1677 "static ",
1678 "inline ",
1679 "constexpr ",
1680 "const ",
1681 "extern ",
1682 ],
1683 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1685 variables: &[],
1686 imports: &["#include "],
1687 tests: TEST_PATTERNS_C_CPP,
1688 assertions: ASSERT_PATTERNS_C_CPP,
1689 test_suites: SUITE_PATTERNS_C_CPP,
1690 variables_prefix_no_paren: &[
1692 "void ",
1693 "bool ",
1694 "int ",
1695 "char ",
1696 "float ",
1697 "double ",
1698 "long ",
1699 "unsigned ",
1700 "size_t ",
1701 "auto ",
1702 "static ",
1703 "inline ",
1704 "constexpr ",
1705 "const ",
1706 "extern ",
1707 ],
1708};
1709
1710const SP_SHELL: SymbolPatterns = SymbolPatterns {
1711 functions: &["function "],
1712 functions_prefix_paren: &[],
1713 classes: &[],
1714 variables: &["declare ", "local ", "export "],
1715 imports: &["source ", ". "],
1716 tests: &[],
1717 assertions: &[],
1718 test_suites: &[],
1719 variables_prefix_no_paren: &[],
1720};
1721
1722const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1723 functions: &["function ", "Function "],
1724 functions_prefix_paren: &[],
1725 classes: &["class "],
1726 variables: &[],
1727 imports: &["Import-Module ", "using "],
1728 tests: &["Describe ", "It ", "Context "],
1730 assertions: &[],
1731 test_suites: &[],
1732 variables_prefix_no_paren: &[],
1733};
1734
1735const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1736 functions: &[
1737 "fun ",
1738 "private fun ",
1739 "public fun ",
1740 "protected fun ",
1741 "internal fun ",
1742 "override fun ",
1743 "suspend fun ",
1744 "abstract fun ",
1745 "open fun ",
1746 "private suspend fun ",
1747 "public suspend fun ",
1748 ],
1749 functions_prefix_paren: &[],
1750 classes: &[
1751 "class ",
1752 "data class ",
1753 "sealed class ",
1754 "abstract class ",
1755 "open class ",
1756 "object ",
1757 "companion object",
1758 "interface ",
1759 "enum class ",
1760 "annotation class ",
1761 ],
1762 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1763 imports: &["import "],
1764 tests: &[
1766 "@Test",
1767 "@ParameterizedTest",
1768 "@RepeatedTest",
1769 "\"should ",
1770 "\"it ",
1771 ],
1772 assertions: &[
1773 "assertEquals(",
1774 "assertNotEquals(",
1775 "assertTrue(",
1776 "assertFalse(",
1777 "assertNull(",
1778 "assertNotNull(",
1779 "assertThat(",
1780 "assertThrows(",
1781 "shouldBe(",
1782 "shouldNotBe(",
1783 "shouldThrow(",
1784 ],
1785 test_suites: &[],
1786 variables_prefix_no_paren: &[],
1787};
1788
1789const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1790 functions: &[
1791 "func ",
1792 "private func ",
1793 "public func ",
1794 "internal func ",
1795 "override func ",
1796 "open func ",
1797 "static func ",
1798 "class func ",
1799 "mutating func ",
1800 "private static func ",
1801 "public static func ",
1802 ],
1803 functions_prefix_paren: &[],
1804 classes: &[
1805 "class ",
1806 "struct ",
1807 "protocol ",
1808 "enum ",
1809 "extension ",
1810 "actor ",
1811 "public class ",
1812 "private class ",
1813 "open class ",
1814 "final class ",
1815 "public struct ",
1816 "private struct ",
1817 "public protocol ",
1818 ],
1819 variables: &[
1820 "var ",
1821 "let ",
1822 "private var ",
1823 "private let ",
1824 "static var ",
1825 "static let ",
1826 ],
1827 imports: &["import "],
1828 tests: &["func test", "func Test", "@Test"],
1830 assertions: &[
1831 "XCTAssertEqual(",
1832 "XCTAssertNotEqual(",
1833 "XCTAssertTrue(",
1834 "XCTAssertFalse(",
1835 "XCTAssertNil(",
1836 "XCTAssertNotNil(",
1837 "XCTAssertGreaterThan(",
1838 "XCTAssertLessThan(",
1839 "XCTAssertThrowsError(",
1840 "XCTAssertNoThrow(",
1841 "#expect(",
1842 ],
1843 test_suites: &[],
1844 variables_prefix_no_paren: &[],
1845};
1846
1847const SP_RUBY: SymbolPatterns = SymbolPatterns {
1848 functions: &["def ", "private def ", "protected def "],
1849 functions_prefix_paren: &[],
1850 classes: &["class ", "module "],
1851 variables: &[],
1852 imports: &["require ", "require_relative "],
1853 tests: &["it ", "it(", "describe ", "context ", "test "],
1855 assertions: &[],
1856 test_suites: &[],
1857 variables_prefix_no_paren: &[],
1858};
1859
1860const SP_SCALA: SymbolPatterns = SymbolPatterns {
1861 functions: &["def ", "private def ", "protected def ", "override def "],
1862 functions_prefix_paren: &[],
1863 classes: &[
1864 "class ",
1865 "case class ",
1866 "abstract class ",
1867 "sealed class ",
1868 "object ",
1869 "trait ",
1870 ],
1871 variables: &["val ", "var ", "lazy val "],
1872 imports: &["import "],
1873 tests: &["test(", "it(", "describe("],
1875 assertions: &[],
1876 test_suites: &[],
1877 variables_prefix_no_paren: &[],
1878};
1879
1880const SP_PHP: SymbolPatterns = SymbolPatterns {
1881 functions: &[
1882 "function ",
1883 "public function ",
1884 "private function ",
1885 "protected function ",
1886 "static function ",
1887 "abstract function ",
1888 "final function ",
1889 "public static function ",
1890 "private static function ",
1891 "protected static function ",
1892 ],
1893 functions_prefix_paren: &[],
1894 classes: &[
1895 "class ",
1896 "abstract class ",
1897 "final class ",
1898 "interface ",
1899 "trait ",
1900 "enum ",
1901 ],
1902 variables: &[],
1903 imports: &[
1904 "use ",
1905 "require ",
1906 "require_once ",
1907 "include ",
1908 "include_once ",
1909 ],
1910 tests: &[
1912 "public function test",
1913 "function test",
1914 "#[Test]",
1915 "#[DataProvider(",
1916 ],
1917 assertions: &[],
1918 test_suites: &[],
1919 variables_prefix_no_paren: &[],
1920};
1921
1922const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1923 functions: &[
1924 "def ",
1925 "defp ",
1926 "defmacro ",
1927 "defmacrop ",
1928 "defguard ",
1929 "defguardp ",
1930 ],
1931 functions_prefix_paren: &[],
1932 classes: &["defmodule ", "defprotocol ", "defimpl "],
1933 variables: &[],
1934 imports: &["import ", "alias ", "use ", "require "],
1935 tests: &["test ", "describe "],
1937 assertions: &[],
1938 test_suites: &[],
1939 variables_prefix_no_paren: &[],
1940};
1941
1942const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1943 functions: &[],
1944 functions_prefix_paren: &[],
1945 classes: &["-module("],
1946 variables: &[],
1947 imports: &["-import(", "-include(", "-include_lib("],
1948 tests: &[],
1949 assertions: &[],
1950 test_suites: &[],
1951 variables_prefix_no_paren: &[],
1952};
1953
1954const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1955 functions: &[
1956 "let ",
1957 "let rec ",
1958 "member ",
1959 "override ",
1960 "abstract member ",
1961 ],
1962 functions_prefix_paren: &[],
1963 classes: &["type "],
1964 variables: &["let mutable "],
1965 imports: &["open "],
1966 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1968 assertions: &[],
1969 test_suites: &[],
1970 variables_prefix_no_paren: &[],
1971};
1972
1973const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1974 functions: &["def ", "private def ", "public def ", "protected def "],
1975 functions_prefix_paren: &[],
1976 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1977 variables: &[],
1978 imports: &["import "],
1979 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1981 assertions: &[],
1982 test_suites: &[],
1983 variables_prefix_no_paren: &[],
1984};
1985
1986const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1987 functions: &[],
1988 functions_prefix_paren: &[],
1989 classes: &["class ", "data ", "newtype ", "type "],
1990 variables: &[],
1991 imports: &["import "],
1992 tests: &[],
1993 assertions: &[],
1994 test_suites: &[],
1995 variables_prefix_no_paren: &[],
1996};
1997
1998const SP_LUA: SymbolPatterns = SymbolPatterns {
1999 functions: &["function ", "local function "],
2000 functions_prefix_paren: &[],
2001 classes: &[],
2002 variables: &["local "],
2003 imports: &[],
2004 tests: &["it(", "describe(", "pending("],
2006 assertions: &[],
2007 test_suites: &[],
2008 variables_prefix_no_paren: &[],
2009};
2010
2011const SP_NIM: SymbolPatterns = SymbolPatterns {
2012 functions: &[
2013 "proc ",
2014 "func ",
2015 "method ",
2016 "iterator ",
2017 "converter ",
2018 "template ",
2019 "macro ",
2020 ],
2021 functions_prefix_paren: &[],
2022 classes: &["type "],
2023 variables: &["var ", "let ", "const "],
2024 imports: &["import ", "from "],
2025 tests: &["test "],
2027 assertions: &[],
2028 test_suites: &[],
2029 variables_prefix_no_paren: &[],
2030};
2031
2032const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
2033 functions: &["- (", "+ ("],
2034 functions_prefix_paren: &[],
2035 classes: &["@interface ", "@implementation ", "@protocol "],
2036 variables: &[],
2037 imports: &["#import ", "#include "],
2038 tests: &["- (void)test"],
2040 assertions: &[
2041 "XCTAssertEqual(",
2042 "XCTAssertNotEqual(",
2043 "XCTAssertTrue(",
2044 "XCTAssertFalse(",
2045 "XCTAssertNil(",
2046 "XCTAssertNotNil(",
2047 "XCTAssertGreaterThan(",
2048 "XCTAssertLessThan(",
2049 "XCTAssertThrowsError(",
2050 "XCTAssertNoThrow(",
2051 ],
2052 test_suites: &[],
2053 variables_prefix_no_paren: &[],
2054};
2055
2056const SP_OCAML: SymbolPatterns = SymbolPatterns {
2057 functions: &["let ", "let rec "],
2058 functions_prefix_paren: &[],
2059 classes: &["type ", "module ", "class "],
2060 variables: &[],
2061 imports: &["open "],
2062 tests: &[],
2063 assertions: &[],
2064 test_suites: &[],
2065 variables_prefix_no_paren: &[],
2066};
2067
2068const SP_PERL: SymbolPatterns = SymbolPatterns {
2069 functions: &["sub "],
2070 functions_prefix_paren: &[],
2071 classes: &["package "],
2072 variables: &["my ", "our ", "local "],
2073 imports: &["use ", "require "],
2074 tests: &[],
2075 assertions: &[],
2076 test_suites: &[],
2077 variables_prefix_no_paren: &[],
2078};
2079
2080const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
2081 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
2082 functions_prefix_paren: &[],
2083 classes: &[
2084 "(defrecord ",
2085 "(defprotocol ",
2086 "(deftype ",
2087 "(definterface ",
2088 ],
2089 variables: &["(def ", "(defonce "],
2090 imports: &["(ns ", "(require "],
2091 tests: &["(deftest ", "(testing "],
2093 assertions: &[],
2094 test_suites: &[],
2095 variables_prefix_no_paren: &[],
2096};
2097
2098const SP_JULIA: SymbolPatterns = SymbolPatterns {
2099 functions: &["function ", "macro "],
2100 functions_prefix_paren: &[],
2101 classes: &[
2102 "struct ",
2103 "mutable struct ",
2104 "abstract type ",
2105 "primitive type ",
2106 ],
2107 variables: &["const "],
2108 imports: &["import ", "using "],
2109 tests: &["@test ", "@testset "],
2111 assertions: &[],
2112 test_suites: &[],
2113 variables_prefix_no_paren: &[],
2114};
2115
2116const SP_DART: SymbolPatterns = SymbolPatterns {
2117 functions: &[],
2118 functions_prefix_paren: &[],
2119 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
2120 variables: &["var ", "final ", "const ", "late "],
2121 imports: &["import "],
2122 tests: &["test(", "testWidgets(", "group("],
2124 assertions: &[],
2125 test_suites: &[],
2126 variables_prefix_no_paren: &[],
2127};
2128
2129const SP_R: SymbolPatterns = SymbolPatterns {
2130 functions: &[],
2131 functions_prefix_paren: &[],
2132 classes: &[],
2133 variables: &[],
2134 imports: &["library(", "source("],
2135 tests: &["test_that(", "it(", "describe(", "expect_"],
2137 assertions: &[],
2138 test_suites: &[],
2139 variables_prefix_no_paren: &[],
2140};
2141
2142const SP_SQL: SymbolPatterns = SymbolPatterns {
2143 functions: &[
2144 "create function ",
2145 "create or replace function ",
2146 "create procedure ",
2147 "create or replace procedure ",
2148 "CREATE FUNCTION ",
2149 "CREATE OR REPLACE FUNCTION ",
2150 "CREATE PROCEDURE ",
2151 "CREATE OR REPLACE PROCEDURE ",
2152 ],
2153 functions_prefix_paren: &[],
2154 classes: &[
2155 "create table ",
2156 "create view ",
2157 "create schema ",
2158 "CREATE TABLE ",
2159 "CREATE VIEW ",
2160 "CREATE SCHEMA ",
2161 ],
2162 variables: &["declare ", "DECLARE "],
2163 imports: &[],
2164 tests: &[],
2165 assertions: &[],
2166 test_suites: &[],
2167 variables_prefix_no_paren: &[],
2168};
2169
2170const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
2171 functions: &["proc ", "PROC "],
2172 functions_prefix_paren: &[],
2173 classes: &[],
2174 variables: &[],
2175 imports: &["include ", "INCLUDE ", "%include "],
2176 tests: &[],
2177 assertions: &[],
2178 test_suites: &[],
2179 variables_prefix_no_paren: &[],
2180};
2181
2182const SP_ZIG: SymbolPatterns = SymbolPatterns {
2183 functions: &[
2184 "fn ",
2185 "pub fn ",
2186 "export fn ",
2187 "inline fn ",
2188 "pub inline fn ",
2189 ],
2190 functions_prefix_paren: &[],
2191 classes: &[],
2192 variables: &["var ", "pub var "],
2193 imports: &[],
2194 tests: &["test \"", "test{"],
2196 assertions: &[],
2197 test_suites: &[],
2198 variables_prefix_no_paren: &[],
2199};
2200
2201#[allow(clippy::struct_excessive_bools)]
2205#[derive(Clone, Copy)]
2206struct StaticLangConfig {
2207 line_comments: &'static [&'static str],
2208 block_comment: Option<(&'static str, &'static str)>,
2209 allow_single_quote_strings: bool,
2210 allow_double_quote_strings: bool,
2211 allow_triple_quote_strings: bool,
2212 allow_csharp_verbatim_strings: bool,
2213 symbol_patterns: SymbolPatterns,
2214 has_preprocessor: bool,
2216}
2217
2218#[allow(clippy::struct_excessive_bools)]
2219#[derive(Debug, Clone)]
2220struct ScanConfig {
2221 line_comments: &'static [&'static str],
2222 block_comment: Option<(&'static str, &'static str)>,
2223 allow_single_quote_strings: bool,
2224 allow_double_quote_strings: bool,
2225 allow_triple_quote_strings: bool,
2226 allow_csharp_verbatim_strings: bool,
2227 skip_lines: HashSet<usize>,
2228 symbol_patterns: SymbolPatterns,
2229 branch_keywords: &'static [&'static str],
2231 lsloc_strategy: LslocStrategy,
2233}
2234
2235const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
2245 line_comments: &["//"],
2246 block_comment: Some(("/*", "*/")),
2247 allow_single_quote_strings: true,
2248 allow_double_quote_strings: true,
2249 allow_triple_quote_strings: false,
2250 allow_csharp_verbatim_strings: false,
2251 symbol_patterns: SP_NONE,
2252 has_preprocessor: false,
2253};
2254
2255const HASH_BASE: StaticLangConfig = StaticLangConfig {
2259 line_comments: &["#"],
2260 block_comment: None,
2261 allow_single_quote_strings: true,
2262 allow_double_quote_strings: true,
2263 allow_triple_quote_strings: false,
2264 allow_csharp_verbatim_strings: false,
2265 symbol_patterns: SP_NONE,
2266 has_preprocessor: false,
2267};
2268
2269static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
2273 (
2275 Language::C,
2276 StaticLangConfig {
2277 symbol_patterns: SP_C,
2278 has_preprocessor: true,
2279 ..C_SLASH_BASE
2280 },
2281 ),
2282 (
2283 Language::Cpp,
2284 StaticLangConfig {
2285 symbol_patterns: SP_CPP,
2286 has_preprocessor: true,
2287 ..C_SLASH_BASE
2288 },
2289 ),
2290 (
2291 Language::ObjectiveC,
2292 StaticLangConfig {
2293 symbol_patterns: SP_OBJECTIVEC,
2294 has_preprocessor: true,
2295 ..C_SLASH_BASE
2296 },
2297 ),
2298 (
2300 Language::CSharp,
2301 StaticLangConfig {
2302 symbol_patterns: SP_CSHARP,
2303 allow_csharp_verbatim_strings: true,
2304 ..C_SLASH_BASE
2305 },
2306 ),
2307 (
2308 Language::Go,
2309 StaticLangConfig {
2310 symbol_patterns: SP_GO,
2311 ..C_SLASH_BASE
2312 },
2313 ),
2314 (
2315 Language::Java,
2316 StaticLangConfig {
2317 symbol_patterns: SP_JAVA,
2318 ..C_SLASH_BASE
2319 },
2320 ),
2321 (
2322 Language::JavaScript,
2323 StaticLangConfig {
2324 symbol_patterns: SP_JS,
2325 ..C_SLASH_BASE
2326 },
2327 ),
2328 (
2329 Language::TypeScript,
2330 StaticLangConfig {
2331 symbol_patterns: SP_TS,
2332 ..C_SLASH_BASE
2333 },
2334 ),
2335 (
2336 Language::Svelte,
2337 StaticLangConfig {
2338 symbol_patterns: SP_JS,
2339 ..C_SLASH_BASE
2340 },
2341 ),
2342 (
2343 Language::Vue,
2344 StaticLangConfig {
2345 symbol_patterns: SP_JS,
2346 ..C_SLASH_BASE
2347 },
2348 ),
2349 (
2350 Language::Dart,
2351 StaticLangConfig {
2352 symbol_patterns: SP_DART,
2353 ..C_SLASH_BASE
2354 },
2355 ),
2356 (
2357 Language::Groovy,
2358 StaticLangConfig {
2359 symbol_patterns: SP_GROOVY,
2360 ..C_SLASH_BASE
2361 },
2362 ),
2363 (
2364 Language::Kotlin,
2365 StaticLangConfig {
2366 symbol_patterns: SP_KOTLIN,
2367 ..C_SLASH_BASE
2368 },
2369 ),
2370 (
2371 Language::Scala,
2372 StaticLangConfig {
2373 symbol_patterns: SP_SCALA,
2374 ..C_SLASH_BASE
2375 },
2376 ),
2377 (
2378 Language::Scss,
2379 StaticLangConfig {
2380 symbol_patterns: SP_NONE,
2381 ..C_SLASH_BASE
2382 },
2383 ),
2384 (
2386 Language::Rust,
2387 StaticLangConfig {
2388 symbol_patterns: SP_RUST,
2389 allow_single_quote_strings: false,
2390 ..C_SLASH_BASE
2391 },
2392 ),
2393 (
2395 Language::Swift,
2396 StaticLangConfig {
2397 symbol_patterns: SP_SWIFT,
2398 allow_single_quote_strings: false,
2399 ..C_SLASH_BASE
2400 },
2401 ),
2402 (
2404 Language::Zig,
2405 StaticLangConfig {
2406 symbol_patterns: SP_ZIG,
2407 block_comment: None,
2408 ..C_SLASH_BASE
2409 },
2410 ),
2411 (
2413 Language::FSharp,
2414 StaticLangConfig {
2415 line_comments: &["//"],
2416 block_comment: Some(("(*", "*)")),
2417 allow_single_quote_strings: false,
2418 allow_double_quote_strings: true,
2419 symbol_patterns: SP_FSHARP,
2420 ..C_SLASH_BASE
2421 },
2422 ),
2423 (
2425 Language::Shell,
2426 StaticLangConfig {
2427 symbol_patterns: SP_SHELL,
2428 ..HASH_BASE
2429 },
2430 ),
2431 (
2432 Language::Elixir,
2433 StaticLangConfig {
2434 symbol_patterns: SP_ELIXIR,
2435 ..HASH_BASE
2436 },
2437 ),
2438 (
2439 Language::Perl,
2440 StaticLangConfig {
2441 symbol_patterns: SP_PERL,
2442 ..HASH_BASE
2443 },
2444 ),
2445 (
2446 Language::R,
2447 StaticLangConfig {
2448 symbol_patterns: SP_R,
2449 ..HASH_BASE
2450 },
2451 ),
2452 (
2453 Language::Ruby,
2454 StaticLangConfig {
2455 symbol_patterns: SP_RUBY,
2456 ..HASH_BASE
2457 },
2458 ),
2459 (
2461 Language::Python,
2462 StaticLangConfig {
2463 symbol_patterns: SP_PYTHON,
2464 allow_triple_quote_strings: true,
2465 ..HASH_BASE
2466 },
2467 ),
2468 (
2470 Language::PowerShell,
2471 StaticLangConfig {
2472 symbol_patterns: SP_POWERSHELL,
2473 block_comment: Some(("<#", "#>")),
2474 ..HASH_BASE
2475 },
2476 ),
2477 (
2479 Language::Nim,
2480 StaticLangConfig {
2481 symbol_patterns: SP_NIM,
2482 block_comment: Some(("#[", "]#")),
2483 ..HASH_BASE
2484 },
2485 ),
2486 (
2488 Language::Makefile,
2489 StaticLangConfig {
2490 symbol_patterns: SP_NONE,
2491 allow_single_quote_strings: false,
2492 allow_double_quote_strings: false,
2493 ..HASH_BASE
2494 },
2495 ),
2496 (
2497 Language::Dockerfile,
2498 StaticLangConfig {
2499 symbol_patterns: SP_NONE,
2500 allow_single_quote_strings: false,
2501 allow_double_quote_strings: false,
2502 ..HASH_BASE
2503 },
2504 ),
2505 (
2508 Language::Css,
2509 StaticLangConfig {
2510 line_comments: &[],
2511 block_comment: Some(("/*", "*/")),
2512 symbol_patterns: SP_NONE,
2513 ..C_SLASH_BASE
2514 },
2515 ),
2516 (
2518 Language::Html,
2519 StaticLangConfig {
2520 line_comments: &[],
2521 block_comment: Some(("<!--", "-->")),
2522 allow_single_quote_strings: false,
2523 allow_double_quote_strings: false,
2524 symbol_patterns: SP_NONE,
2525 ..C_SLASH_BASE
2526 },
2527 ),
2528 (
2529 Language::Xml,
2530 StaticLangConfig {
2531 line_comments: &[],
2532 block_comment: Some(("<!--", "-->")),
2533 allow_single_quote_strings: false,
2534 allow_double_quote_strings: false,
2535 symbol_patterns: SP_NONE,
2536 ..C_SLASH_BASE
2537 },
2538 ),
2539 (
2541 Language::Lua,
2542 StaticLangConfig {
2543 line_comments: &["--"],
2544 block_comment: Some(("--[[", "]]")),
2545 symbol_patterns: SP_LUA,
2546 ..C_SLASH_BASE
2547 },
2548 ),
2549 (
2551 Language::Haskell,
2552 StaticLangConfig {
2553 line_comments: &["--"],
2554 block_comment: Some(("{-", "-}")),
2555 symbol_patterns: SP_HASKELL,
2556 ..C_SLASH_BASE
2557 },
2558 ),
2559 (
2561 Language::Sql,
2562 StaticLangConfig {
2563 line_comments: &["--"],
2564 block_comment: Some(("/*", "*/")),
2565 allow_single_quote_strings: true,
2566 allow_double_quote_strings: false,
2567 symbol_patterns: SP_SQL,
2568 ..C_SLASH_BASE
2569 },
2570 ),
2571 (
2573 Language::Ocaml,
2574 StaticLangConfig {
2575 line_comments: &[],
2576 block_comment: Some(("(*", "*)")),
2577 allow_single_quote_strings: false,
2578 symbol_patterns: SP_OCAML,
2579 ..C_SLASH_BASE
2580 },
2581 ),
2582 (
2588 Language::Assembly,
2589 StaticLangConfig {
2590 line_comments: &[";"],
2591 block_comment: Some(("/*", "*/")),
2592 allow_single_quote_strings: false,
2593 allow_double_quote_strings: true,
2594 symbol_patterns: SP_ASSEMBLY,
2595 ..C_SLASH_BASE
2596 },
2597 ),
2598 (
2599 Language::Clojure,
2600 StaticLangConfig {
2601 line_comments: &[";"],
2602 block_comment: None,
2603 allow_single_quote_strings: false,
2604 symbol_patterns: SP_CLOJURE,
2605 ..C_SLASH_BASE
2606 },
2607 ),
2608 (
2610 Language::Erlang,
2611 StaticLangConfig {
2612 line_comments: &["%"],
2613 block_comment: None,
2614 allow_single_quote_strings: false,
2615 symbol_patterns: SP_ERLANG,
2616 ..C_SLASH_BASE
2617 },
2618 ),
2619 (
2621 Language::Php,
2622 StaticLangConfig {
2623 line_comments: &["//", "#"],
2624 block_comment: Some(("/*", "*/")),
2625 symbol_patterns: SP_PHP,
2626 ..C_SLASH_BASE
2627 },
2628 ),
2629 (
2631 Language::Julia,
2632 StaticLangConfig {
2633 line_comments: &["#"],
2634 block_comment: Some(("#=", "=#")),
2635 allow_single_quote_strings: false,
2636 allow_triple_quote_strings: true,
2637 symbol_patterns: SP_JULIA,
2638 ..C_SLASH_BASE
2639 },
2640 ),
2641 (
2644 Language::Solidity,
2645 StaticLangConfig {
2646 symbol_patterns: SP_SOLIDITY,
2647 ..C_SLASH_BASE
2648 },
2649 ),
2650 (
2652 Language::Protobuf,
2653 StaticLangConfig {
2654 symbol_patterns: SP_PROTOBUF,
2655 ..C_SLASH_BASE
2656 },
2657 ),
2658 (
2660 Language::Hcl,
2661 StaticLangConfig {
2662 line_comments: &["#", "//"],
2663 allow_single_quote_strings: false,
2664 symbol_patterns: SP_NONE,
2665 ..C_SLASH_BASE
2666 },
2667 ),
2668 (
2670 Language::GraphQl,
2671 StaticLangConfig {
2672 allow_single_quote_strings: false,
2673 allow_triple_quote_strings: true,
2674 symbol_patterns: SP_NONE,
2675 ..HASH_BASE
2676 },
2677 ),
2678 (
2681 Language::Ada,
2682 StaticLangConfig {
2683 line_comments: &["--"],
2684 block_comment: None,
2685 allow_single_quote_strings: false,
2686 symbol_patterns: SP_ADA,
2687 ..C_SLASH_BASE
2688 },
2689 ),
2690 (
2692 Language::Vhdl,
2693 StaticLangConfig {
2694 line_comments: &["--"],
2695 block_comment: None,
2696 allow_single_quote_strings: false,
2697 symbol_patterns: SP_VHDL,
2698 ..C_SLASH_BASE
2699 },
2700 ),
2701 (
2703 Language::Verilog,
2704 StaticLangConfig {
2705 allow_single_quote_strings: false,
2706 symbol_patterns: SP_VERILOG,
2707 ..C_SLASH_BASE
2708 },
2709 ),
2710 (
2712 Language::Tcl,
2713 StaticLangConfig {
2714 allow_single_quote_strings: false,
2715 symbol_patterns: SP_TCL,
2716 ..HASH_BASE
2717 },
2718 ),
2719 (
2721 Language::Pascal,
2722 StaticLangConfig {
2723 line_comments: &["//"],
2724 block_comment: Some(("{", "}")),
2725 allow_single_quote_strings: true,
2726 allow_double_quote_strings: false,
2727 symbol_patterns: SP_PASCAL,
2728 ..C_SLASH_BASE
2729 },
2730 ),
2731 (
2733 Language::VisualBasic,
2734 StaticLangConfig {
2735 line_comments: &["'"],
2736 block_comment: None,
2737 allow_single_quote_strings: false,
2738 allow_double_quote_strings: true,
2739 symbol_patterns: SP_VB,
2740 ..C_SLASH_BASE
2741 },
2742 ),
2743 (
2745 Language::Lisp,
2746 StaticLangConfig {
2747 line_comments: &[";"],
2748 block_comment: Some(("#|", "|#")),
2749 allow_single_quote_strings: false,
2750 symbol_patterns: SP_LISP,
2751 ..C_SLASH_BASE
2752 },
2753 ),
2754 (
2757 Language::Fortran,
2758 StaticLangConfig {
2759 line_comments: &["!"],
2760 block_comment: None,
2761 symbol_patterns: SP_FORTRAN,
2762 ..C_SLASH_BASE
2763 },
2764 ),
2765 (
2767 Language::Nix,
2768 StaticLangConfig {
2769 block_comment: Some(("/*", "*/")),
2770 allow_single_quote_strings: false,
2771 symbol_patterns: SP_NONE,
2772 ..HASH_BASE
2773 },
2774 ),
2775 (
2777 Language::Crystal,
2778 StaticLangConfig {
2779 symbol_patterns: SP_CRYSTAL,
2780 ..HASH_BASE
2781 },
2782 ),
2783 (
2785 Language::D,
2786 StaticLangConfig {
2787 symbol_patterns: SP_D,
2788 ..C_SLASH_BASE
2789 },
2790 ),
2791 (
2793 Language::Glsl,
2794 StaticLangConfig {
2795 allow_single_quote_strings: false,
2796 symbol_patterns: SP_NONE,
2797 ..C_SLASH_BASE
2798 },
2799 ),
2800 (
2802 Language::Cmake,
2803 StaticLangConfig {
2804 block_comment: Some(("#[[", "]]")),
2805 allow_single_quote_strings: false,
2806 symbol_patterns: SP_CMAKE,
2807 ..HASH_BASE
2808 },
2809 ),
2810 (
2812 Language::Elm,
2813 StaticLangConfig {
2814 line_comments: &["--"],
2815 block_comment: Some(("{-", "-}")),
2816 allow_single_quote_strings: false,
2817 symbol_patterns: SP_ELM,
2818 ..C_SLASH_BASE
2819 },
2820 ),
2821 (
2823 Language::Awk,
2824 StaticLangConfig {
2825 allow_single_quote_strings: false,
2826 symbol_patterns: SP_AWK,
2827 ..HASH_BASE
2828 },
2829 ),
2830];
2831
2832#[derive(Debug, Clone, Copy)]
2835struct IeeeFlags {
2836 has_preprocessor_directives: bool,
2838 blank_in_block_comment_as_comment: bool,
2840 collapse_continuation_lines: bool,
2842}
2843
2844#[derive(Debug, Clone, Copy)]
2845enum StringState {
2846 Single(char),
2847 Triple(&'static str),
2848 VerbatimDouble,
2849}
2850
2851#[allow(clippy::struct_excessive_bools)]
2852#[derive(Debug, Default)]
2853struct LineFacts {
2854 has_code: bool,
2855 has_single_comment: bool,
2856 has_multi_comment: bool,
2857 has_docstring: bool,
2858}
2859
2860fn process_string_char(
2864 state: StringState,
2865 chars: &[char],
2866 i: usize,
2867) -> (Option<StringState>, usize) {
2868 match state {
2869 StringState::Single(delim) => {
2870 if chars[i] == '\\' {
2871 return (Some(state), 2); }
2873 if chars[i] == delim {
2874 (None, 1)
2875 } else {
2876 (Some(state), 1)
2877 }
2878 }
2879 StringState::Triple(delim) => {
2880 if starts_with(chars, i, delim) {
2881 (None, delim.len())
2882 } else {
2883 (Some(state), 1)
2884 }
2885 }
2886 StringState::VerbatimDouble => {
2887 if starts_with(chars, i, "\"\"") {
2888 return (Some(state), 2); }
2890 if chars[i] == '"' {
2891 (None, 1)
2892 } else {
2893 (Some(state), 1)
2894 }
2895 }
2896 }
2897}
2898
2899fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2903 if starts_with(chars, i, close) {
2904 (false, close.len())
2905 } else {
2906 (true, 1)
2907 }
2908}
2909
2910fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2914 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2915 return Some((StringState::VerbatimDouble, 2));
2916 }
2917 if config.allow_triple_quote_strings {
2918 if starts_with(chars, i, "\"\"\"") {
2919 return Some((StringState::Triple("\"\"\""), 3));
2920 }
2921 if starts_with(chars, i, "'''") {
2922 return Some((StringState::Triple("'''"), 3));
2923 }
2924 }
2925 if config.allow_single_quote_strings && chars[i] == '\'' {
2926 return Some((StringState::Single('\''), 1));
2927 }
2928 if config.allow_double_quote_strings && chars[i] == '"' {
2929 return Some((StringState::Single('"'), 1));
2930 }
2931 None
2932}
2933
2934fn step_through_block_comment(
2940 chars: &[char],
2941 i: usize,
2942 block_comment: Option<(&'static str, &'static str)>,
2943 in_block_comment: &mut bool,
2944) -> usize {
2945 if let Some((_, close)) = block_comment {
2946 let (still_in, advance) = process_block_comment_char(chars, i, close);
2947 *in_block_comment = still_in;
2948 return advance;
2949 }
2950 0
2951}
2952
2953fn try_open_block_comment(
2956 chars: &[char],
2957 i: usize,
2958 block_comment: Option<(&'static str, &'static str)>,
2959) -> Option<usize> {
2960 let (open, _) = block_comment?;
2961 starts_with(chars, i, open).then_some(open.len())
2962}
2963
2964fn scan_line(
2968 chars: &[char],
2969 config: &ScanConfig,
2970 facts: &mut LineFacts,
2971 in_block_comment: &mut bool,
2972 string_state: &mut Option<StringState>,
2973) {
2974 let mut i = 0usize;
2975 while i < chars.len() {
2976 if let Some(state) = *string_state {
2978 facts.has_code = true;
2979 let (new_state, advance) = process_string_char(state, chars, i);
2980 *string_state = new_state;
2981 i += advance;
2982 continue;
2983 }
2984
2985 if *in_block_comment {
2987 facts.has_multi_comment = true;
2988 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2989 continue;
2990 }
2991
2992 if chars[i].is_whitespace() {
2994 i += 1;
2995 continue;
2996 }
2997
2998 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
3000 facts.has_code = true;
3001 *string_state = Some(new_state);
3002 i += advance;
3003 continue;
3004 }
3005
3006 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
3008 facts.has_multi_comment = true;
3009 *in_block_comment = true;
3010 i += advance;
3011 continue;
3012 }
3013
3014 if config
3016 .line_comments
3017 .iter()
3018 .any(|prefix| starts_with(chars, i, prefix))
3019 {
3020 facts.has_single_comment = true;
3021 break;
3022 }
3023
3024 facts.has_code = true;
3026 i += 1;
3027 }
3028}
3029
3030fn finalize_line_facts(
3035 facts: LineFacts,
3036 trimmed: &str,
3037 raw: &mut RawLineCounts,
3038 ieee: IeeeFlags,
3039 in_block_comment: bool,
3040 string_state: Option<StringState>,
3041 pending_continuation: &mut Option<LineFacts>,
3042) -> Option<LineFacts> {
3043 if ieee.has_preprocessor_directives
3047 && facts.has_code
3048 && !facts.has_single_comment
3049 && !facts.has_multi_comment
3050 && trimmed.starts_with('#')
3051 {
3052 raw.compiler_directive_lines += 1;
3053 }
3054
3055 let is_continuation = ieee.collapse_continuation_lines
3058 && !in_block_comment
3059 && string_state.is_none()
3060 && trimmed.ends_with('\\');
3061
3062 if is_continuation {
3063 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
3064 pending.has_code |= facts.has_code;
3065 pending.has_single_comment |= facts.has_single_comment;
3066 pending.has_multi_comment |= facts.has_multi_comment;
3067 pending.has_docstring |= facts.has_docstring;
3068 return None; }
3070
3071 let emit = if let Some(pending) = pending_continuation.take() {
3073 LineFacts {
3074 has_code: pending.has_code | facts.has_code,
3075 has_single_comment: pending.has_single_comment | facts.has_single_comment,
3076 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
3077 has_docstring: pending.has_docstring | facts.has_docstring,
3078 }
3079 } else {
3080 facts
3081 };
3082 Some(emit)
3083}
3084
3085#[allow(clippy::needless_pass_by_value)]
3090#[allow(clippy::too_many_arguments)]
3091#[allow(clippy::many_single_char_names)] fn process_physical_line(
3093 line: &str,
3094 line_idx: usize,
3095 config: &ScanConfig,
3096 raw: &mut RawLineCounts,
3097 in_block_comment: &mut bool,
3098 string_state: &mut Option<StringState>,
3099 pending_continuation: &mut Option<LineFacts>,
3100 ieee: IeeeFlags,
3101) {
3102 raw.total_physical_lines += 1;
3103
3104 if config.skip_lines.contains(&line_idx) {
3105 raw.docstring_comment_lines += 1;
3106 return;
3107 }
3108
3109 let trimmed = line.trim();
3110 let mut facts = LineFacts::default();
3111
3112 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
3116 facts.has_multi_comment = true;
3117 }
3118
3119 let chars: Vec<char> = line.chars().collect();
3120 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
3121
3122 let Some(emit) = finalize_line_facts(
3123 facts,
3124 trimmed,
3125 raw,
3126 ieee,
3127 *in_block_comment,
3128 *string_state,
3129 pending_continuation,
3130 ) else {
3131 return;
3132 };
3133
3134 classify_line(raw, &emit, trimmed);
3135
3136 if emit.has_code {
3137 use std::hash::{DefaultHasher, Hash, Hasher};
3138 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
3139 raw.functions += f;
3140 raw.classes += c;
3141 raw.variables += v;
3142 raw.imports += i;
3143 raw.test_count += t;
3144 raw.test_assertion_count += a;
3145 raw.test_suite_count += s;
3146
3147 raw.cyclomatic_complexity +=
3149 count_branch_in_line(trimmed.as_bytes(), config.branch_keywords);
3150
3151 match config.lsloc_strategy {
3153 LslocStrategy::Semicolons => {
3154 let semi = u32::try_from(trimmed.bytes().filter(|&b| b == b';').count())
3155 .unwrap_or(u32::MAX);
3156 *raw.lsloc.get_or_insert(0) += semi;
3157 }
3158 LslocStrategy::NonContinuationNewlines => {
3159 let cont = trimmed.ends_with('\\')
3160 || trimmed.ends_with(',')
3161 || trimmed.ends_with('(')
3162 || trimmed.ends_with('[')
3163 || trimmed.ends_with('{');
3164 if !cont {
3165 *raw.lsloc.get_or_insert(0) += 1;
3166 }
3167 }
3168 LslocStrategy::Unsupported => {}
3169 }
3170
3171 let mut h = DefaultHasher::new();
3173 trimmed.hash(&mut h);
3174 raw.code_line_hashes.push(h.finish());
3175 }
3176}
3177
3178#[allow(clippy::needless_pass_by_value)]
3179fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
3180 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3181 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3182
3183 let mut raw = RawLineCounts::default();
3184 let mut warnings = Vec::new();
3185
3186 let mut in_block_comment = false;
3187 let mut string_state: Option<StringState> = None;
3188 let mut pending_continuation: Option<LineFacts> = None;
3190
3191 for (line_idx, line) in lines.iter().enumerate() {
3192 process_physical_line(
3193 line,
3194 line_idx,
3195 &config,
3196 &mut raw,
3197 &mut in_block_comment,
3198 &mut string_state,
3199 &mut pending_continuation,
3200 ieee,
3201 );
3202 }
3203
3204 if let Some(pending) = pending_continuation.take() {
3206 classify_line(&mut raw, &pending, "");
3207 }
3208
3209 if in_block_comment {
3210 warnings.push("unclosed block comment detected; result is best effort".into());
3211 }
3212 if string_state.is_some() {
3213 warnings.push("unclosed string literal detected; result is best effort".into());
3214 }
3215
3216 RawFileAnalysis {
3217 raw,
3218 parse_mode: if warnings.is_empty() {
3219 ParseMode::Lexical
3220 } else {
3221 ParseMode::LexicalBestEffort
3222 },
3223 warnings,
3224 style_analysis: None,
3225 }
3226}
3227
3228const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
3229 if facts.has_docstring {
3230 raw.docstring_comment_lines += 1;
3231 } else if !facts.has_code
3232 && !facts.has_single_comment
3233 && !facts.has_multi_comment
3234 && trimmed.is_empty()
3235 {
3236 raw.blank_only_lines += 1;
3237 } else if facts.has_code && facts.has_single_comment {
3238 raw.mixed_code_single_comment_lines += 1;
3239 } else if facts.has_code && facts.has_multi_comment {
3240 raw.mixed_code_multi_comment_lines += 1;
3241 } else if facts.has_code {
3242 raw.code_only_lines += 1;
3243 } else if facts.has_single_comment {
3244 raw.single_comment_only_lines += 1;
3245 } else if facts.has_multi_comment {
3246 raw.multi_comment_only_lines += 1;
3247 } else if trimmed.is_empty() {
3248 raw.blank_only_lines += 1;
3249 } else {
3250 raw.skipped_unknown_lines += 1;
3251 }
3252}
3253
3254fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3255 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
3256 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
3259 0
3260 } else if let Some(paren_pos) = trimmed.find('(') {
3261 if trimmed[..paren_pos].contains('=') {
3262 0
3263 } else {
3264 hit(patterns.functions_prefix_paren)
3265 }
3266 } else {
3267 0
3268 };
3269 let test_hit = hit(patterns.tests);
3270 let fn_hit = if test_hit == 0 {
3277 hit(patterns.functions) | fn_pp
3278 } else {
3279 0
3280 };
3281 let class_hit = if test_hit == 0 {
3282 hit(patterns.classes)
3283 } else {
3284 0
3285 };
3286 let var_pnp: u64 = if !patterns.variables_prefix_no_paren.is_empty()
3289 && hit(patterns.variables_prefix_no_paren) != 0
3290 {
3291 if let Some(pp) = trimmed.find('(') {
3292 if trimmed[..pp].contains('=') {
3293 1
3294 } else {
3295 0
3296 }
3297 } else {
3298 1
3299 }
3300 } else {
3301 0
3302 };
3303 (
3304 fn_hit,
3305 class_hit,
3306 hit(patterns.variables) | var_pnp,
3307 hit(patterns.imports),
3308 test_hit,
3309 hit(patterns.assertions),
3310 hit(patterns.test_suites),
3311 )
3312}
3313
3314fn is_word_boundary(line: &[u8], start: usize, end: usize) -> bool {
3316 let before_ok =
3317 start == 0 || (!line[start - 1].is_ascii_alphanumeric() && line[start - 1] != b'_');
3318 let after_ok = end >= line.len() || (!line[end].is_ascii_alphanumeric() && line[end] != b'_');
3319 before_ok && after_ok
3320}
3321
3322fn keyword_matches_at(line: &[u8], i: usize, kw_bytes: &[u8], word_kw: bool) -> bool {
3324 if &line[i..i + kw_bytes.len()] != kw_bytes {
3325 return false;
3326 }
3327 !word_kw || is_word_boundary(line, i, i + kw_bytes.len())
3328}
3329
3330fn count_branch_in_line(line: &[u8], keywords: &[&str]) -> u32 {
3335 if keywords.is_empty() || line.is_empty() {
3336 return 0;
3337 }
3338 let mut total = 0u32;
3339 for &kw in keywords {
3340 let kw_bytes = kw.as_bytes();
3341 let word_kw = kw.bytes().all(|b| b.is_ascii_alphabetic() || b == b'_');
3342 let mut i = 0usize;
3343 while i + kw_bytes.len() <= line.len() {
3344 if keyword_matches_at(line, i, kw_bytes, word_kw) {
3345 total += 1;
3346 i += kw_bytes.len();
3347 } else {
3348 i += 1;
3349 }
3350 }
3351 }
3352 total
3353}
3354
3355fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
3356 let needle_chars: Vec<char> = needle.chars().collect();
3357 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
3358}
3359
3360#[derive(Debug, Clone)]
3361struct PyContext {
3362 indent: usize,
3363 expect_docstring: bool,
3364}
3365
3366fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
3368 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
3369 contexts.pop();
3370 }
3371}
3372
3373fn py_handle_pending_indent(
3376 pending_block_indent: &mut Option<usize>,
3377 contexts: &mut Vec<PyContext>,
3378 indent: usize,
3379 trimmed: &str,
3380) {
3381 let Some(base_indent) = *pending_block_indent else {
3382 return;
3383 };
3384 if indent > base_indent {
3385 contexts.push(PyContext {
3386 indent,
3387 expect_docstring: true,
3388 });
3389 *pending_block_indent = None;
3390 } else if !trimmed.starts_with('@') {
3391 *pending_block_indent = None;
3392 }
3393}
3394
3395fn py_try_record_docstring(
3401 ctx: &mut PyContext,
3402 trimmed: &str,
3403 idx: usize,
3404 docstring_lines: &mut HashSet<usize>,
3405 active_docstring: &mut Option<(&'static str, usize)>,
3406) -> bool {
3407 if !ctx.expect_docstring {
3408 return false;
3409 }
3410 if let Some(delim) = docstring_delimiter(trimmed) {
3411 docstring_lines.insert(idx);
3412 ctx.expect_docstring = false;
3413 if !closes_triple_docstring(trimmed, delim, true) {
3414 *active_docstring = Some((delim, idx));
3415 }
3416 return true;
3417 }
3418 ctx.expect_docstring = false;
3419 false
3420}
3421
3422fn track_active_docstring(
3426 active_docstring: &mut Option<(&'static str, usize)>,
3427 docstring_lines: &mut HashSet<usize>,
3428 idx: usize,
3429 trimmed: &str,
3430) -> bool {
3431 let Some((delim, start_line)) = *active_docstring else {
3432 return false;
3433 };
3434 docstring_lines.insert(idx);
3435 if closes_triple_docstring(trimmed, delim, idx == start_line) {
3436 *active_docstring = None;
3437 }
3438 true
3439}
3440
3441fn try_record_docstring_if_context(
3444 contexts: &mut [PyContext],
3445 trimmed: &str,
3446 idx: usize,
3447 docstring_lines: &mut HashSet<usize>,
3448 active_docstring: &mut Option<(&'static str, usize)>,
3449) -> bool {
3450 let Some(ctx) = contexts.last_mut() else {
3451 return false;
3452 };
3453 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
3454}
3455
3456fn mark_unclosed_docstring_lines(
3458 active_docstring: Option<&(&'static str, usize)>,
3459 docstring_lines: &mut HashSet<usize>,
3460 num_lines: usize,
3461) {
3462 if let Some(&(_, start_line)) = active_docstring {
3463 for idx in start_line..num_lines {
3464 docstring_lines.insert(idx);
3465 }
3466 }
3467}
3468
3469fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
3470 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3471 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3472
3473 let mut docstring_lines = HashSet::new();
3474 let mut contexts = vec![PyContext {
3475 indent: 0,
3476 expect_docstring: true,
3477 }];
3478 let mut pending_block_indent: Option<usize> = None;
3479 let mut active_docstring: Option<(&'static str, usize)> = None;
3480
3481 for (idx, line) in lines.iter().enumerate() {
3482 let trimmed = line.trim();
3483 let indent = leading_indent(line);
3484
3485 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
3486 continue;
3487 }
3488
3489 if trimmed.is_empty() || trimmed.starts_with('#') {
3491 continue;
3492 }
3493
3494 py_pop_outdented_contexts(&mut contexts, indent);
3495 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
3496
3497 if try_record_docstring_if_context(
3498 &mut contexts,
3499 trimmed,
3500 idx,
3501 &mut docstring_lines,
3502 &mut active_docstring,
3503 ) {
3504 continue;
3505 }
3506
3507 if is_python_block_header(trimmed) {
3508 pending_block_indent = Some(indent);
3509 }
3510 }
3511
3512 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
3513
3514 docstring_lines
3515}
3516
3517fn leading_indent(line: &str) -> usize {
3518 line.chars().take_while(|c| c.is_whitespace()).count()
3519}
3520
3521fn is_python_block_header(trimmed: &str) -> bool {
3522 (trimmed.starts_with("def ")
3523 || trimmed.starts_with("async def ")
3524 || trimmed.starts_with("class "))
3525 && trimmed.ends_with(':')
3526}
3527
3528fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
3529 let mut idx = 0usize;
3530 let bytes = trimmed.as_bytes();
3531 while idx < bytes.len() {
3532 let c = bytes[idx] as char;
3533 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
3534 idx += 1;
3535 continue;
3536 }
3537 break;
3538 }
3539
3540 let rest = &trimmed[idx..];
3541 if rest.starts_with("\"\"\"") {
3542 Some("\"\"\"")
3543 } else if rest.starts_with("'''") {
3544 Some("'''")
3545 } else {
3546 None
3547 }
3548}
3549
3550fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
3551 let mut occurrences = 0usize;
3552 let mut search = trimmed;
3553 while let Some(index) = search.find(delim) {
3554 occurrences += 1;
3555 search = &search[index + delim.len()..];
3556 }
3557
3558 if same_line_as_start {
3559 occurrences >= 2
3560 } else {
3561 occurrences >= 1
3562 }
3563}
3564
3565#[cfg(feature = "tree-sitter")]
3570pub mod ts {
3571 use tree_sitter::Node;
3572
3573 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
3574
3575 struct SymbolKinds {
3577 function_def: &'static str,
3579 class_def: &'static str,
3581 test_fn_prefix: &'static str,
3584 test_class_prefix: &'static str,
3587 assertion_attr_prefix: &'static str,
3591 }
3592
3593 impl SymbolKinds {
3594 const fn none() -> Self {
3595 Self {
3596 function_def: "",
3597 class_def: "",
3598 test_fn_prefix: "",
3599 test_class_prefix: "",
3600 assertion_attr_prefix: "",
3601 }
3602 }
3603 }
3604
3605 fn analyze_lines(
3611 text: &str,
3612 ts_language: &tree_sitter::Language,
3613 comment_node_kinds: &[&str],
3614 docstring_stmt_kind: Option<&str>,
3615 symbols: &SymbolKinds,
3616 ) -> Option<RawFileAnalysis> {
3617 let mut parser = tree_sitter::Parser::new();
3618 parser.set_language(ts_language).ok()?;
3619 let tree = parser.parse(text, None)?;
3620
3621 let lines: Vec<&str> = text.split_terminator('\n').collect();
3622 let n = lines.len();
3623
3624 let mut has_code = vec![false; n];
3625 let mut has_comment = vec![false; n];
3626 let mut comment_is_block = vec![false; n];
3627 let mut has_docstring = vec![false; n];
3628
3629 let mut ctx = VisitCtx {
3631 source: text.as_bytes(),
3632 comment_kinds: comment_node_kinds,
3633 docstring_stmt_kind,
3634 has_code: &mut has_code,
3635 has_comment: &mut has_comment,
3636 comment_is_block: &mut comment_is_block,
3637 has_docstring: &mut has_docstring,
3638 };
3639 visit(tree.root_node(), &mut ctx);
3640
3641 let mut raw = RawLineCounts::default();
3642 classify_ts_lines(
3643 &lines,
3644 &has_code,
3645 &has_comment,
3646 &comment_is_block,
3647 &has_docstring,
3648 &mut raw,
3649 );
3650
3651 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
3653 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
3654 }
3655
3656 Some(RawFileAnalysis {
3657 raw,
3658 parse_mode: ParseMode::TreeSitter,
3659 warnings: Vec::new(),
3660 style_analysis: None,
3661 })
3662 }
3663
3664 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3666 for i in 0..node.child_count() {
3667 #[allow(clippy::cast_possible_truncation)]
3668 if let Some(child) = node.child(i as u32) {
3669 count_symbols(child, source, kinds, raw);
3670 }
3671 }
3672 }
3673
3674 fn try_count_function(
3676 node: Node,
3677 source: &[u8],
3678 kinds: &SymbolKinds,
3679 raw: &mut RawLineCounts,
3680 ) -> bool {
3681 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
3682 return false;
3683 }
3684 let name = node
3685 .child_by_field_name("name")
3686 .and_then(|n| n.utf8_text(source).ok())
3687 .unwrap_or("");
3688 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
3689 raw.test_count += 1;
3690 } else {
3691 raw.functions += 1;
3692 }
3693 recurse_children(node, source, kinds, raw);
3694 true
3695 }
3696
3697 fn try_count_class(
3699 node: Node,
3700 source: &[u8],
3701 kinds: &SymbolKinds,
3702 raw: &mut RawLineCounts,
3703 ) -> bool {
3704 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
3705 return false;
3706 }
3707 let name = node
3708 .child_by_field_name("name")
3709 .and_then(|n| n.utf8_text(source).ok())
3710 .unwrap_or("");
3711 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
3712 raw.test_count += 1;
3713 } else {
3714 raw.classes += 1;
3715 }
3716 recurse_children(node, source, kinds, raw);
3717 true
3718 }
3719
3720 fn try_count_assertion(
3723 node: Node,
3724 source: &[u8],
3725 kinds: &SymbolKinds,
3726 raw: &mut RawLineCounts,
3727 ) -> bool {
3728 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
3729 return false;
3730 }
3731 let Some(func) = node.child_by_field_name("function") else {
3732 return false;
3733 };
3734 if func.kind() != "attribute" {
3735 return false;
3736 }
3737 let attr_text = func
3738 .child_by_field_name("attribute")
3739 .and_then(|n| n.utf8_text(source).ok())
3740 .unwrap_or("");
3741 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
3742 return false;
3743 }
3744 raw.test_assertion_count += 1;
3745 true
3746 }
3747
3748 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3751 if try_count_function(node, source, kinds, raw) {
3752 return;
3753 }
3754 if try_count_class(node, source, kinds, raw) {
3755 return;
3756 }
3757 if try_count_assertion(node, source, kinds, raw) {
3758 return;
3759 }
3760 recurse_children(node, source, kinds, raw);
3761 }
3762
3763 #[allow(clippy::struct_excessive_bools)]
3766 #[derive(Clone, Copy)]
3767 struct TsLineFlags {
3768 has_code: bool,
3769 has_comment: bool,
3770 comment_is_block: bool,
3771 has_docstring: bool,
3772 }
3773
3774 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
3776 if trimmed.is_empty() {
3777 raw.blank_only_lines += 1;
3778 } else if flags.has_docstring && !flags.has_code {
3779 raw.docstring_comment_lines += 1;
3780 } else if flags.has_code && flags.has_comment {
3781 if flags.comment_is_block {
3783 raw.mixed_code_multi_comment_lines += 1;
3784 } else {
3785 raw.mixed_code_single_comment_lines += 1;
3786 }
3787 } else if flags.has_comment {
3788 if flags.comment_is_block {
3789 raw.multi_comment_only_lines += 1;
3790 } else {
3791 raw.single_comment_only_lines += 1;
3792 }
3793 } else {
3794 raw.code_only_lines += 1;
3795 }
3796 }
3797
3798 fn classify_ts_lines(
3800 lines: &[&str],
3801 has_code: &[bool],
3802 has_comment: &[bool],
3803 comment_is_block: &[bool],
3804 has_docstring: &[bool],
3805 raw: &mut RawLineCounts,
3806 ) {
3807 for i in 0..lines.len() {
3808 raw.total_physical_lines += 1;
3809 classify_ts_line(
3810 lines[i].trim(),
3811 TsLineFlags {
3812 has_code: has_code[i],
3813 has_comment: has_comment[i],
3814 comment_is_block: comment_is_block[i],
3815 has_docstring: has_docstring[i],
3816 },
3817 raw,
3818 );
3819 }
3820 }
3821
3822 struct VisitCtx<'a> {
3823 source: &'a [u8],
3824 comment_kinds: &'a [&'a str],
3825 docstring_stmt_kind: Option<&'a str>,
3826 has_code: &'a mut Vec<bool>,
3827 has_comment: &'a mut Vec<bool>,
3828 comment_is_block: &'a mut Vec<bool>,
3829 has_docstring: &'a mut Vec<bool>,
3830 }
3831
3832 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
3834 let start_row = node.start_position().row;
3835 let end_row = node.end_position().row;
3836 let first_two = node
3837 .utf8_text(ctx.source)
3838 .unwrap_or("")
3839 .get(..2)
3840 .unwrap_or("");
3841 let is_block = first_two == "/*" || first_two == "<#";
3842 for row in start_row..=end_row {
3843 if row < ctx.has_comment.len() {
3844 ctx.has_comment[row] = true;
3845 if is_block {
3846 ctx.comment_is_block[row] = true;
3847 }
3848 }
3849 }
3850 }
3851
3852 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
3855 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
3856 return false;
3857 };
3858 if kind != stmt_kind || node.named_child_count() != 1 {
3859 return false;
3860 }
3861 let Some(child) = node.named_child(0) else {
3862 return false;
3863 };
3864 if child.kind() != "string" {
3865 return false;
3866 }
3867 let child_start = child.start_position().row;
3868 let child_end = child.end_position().row;
3869 for row in child_start..=child_end {
3870 if row < ctx.has_docstring.len() {
3871 ctx.has_docstring[row] = true;
3872 }
3873 }
3874 true
3875 }
3876
3877 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
3879 let start_row = node.start_position().row;
3880 let end_row = node.end_position().row;
3881 for row in start_row..=end_row {
3882 if row < ctx.has_code.len() {
3883 ctx.has_code[row] = true;
3884 }
3885 }
3886 }
3887
3888 #[allow(clippy::too_many_lines)]
3889 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
3890 let kind = node.kind();
3891
3892 if ctx.comment_kinds.contains(&kind) {
3894 visit_comment_node(node, ctx);
3895 return;
3896 }
3897
3898 if visit_maybe_docstring(node, kind, ctx) {
3900 return;
3901 }
3902
3903 if node.child_count() == 0 && !node.is_extra() {
3905 visit_leaf_code(node, ctx);
3906 return;
3907 }
3908
3909 for i in 0..node.child_count() {
3910 #[allow(clippy::cast_possible_truncation)]
3911 if let Some(child) = node.child(i as u32) {
3913 visit(child, ctx);
3914 }
3915 }
3916 }
3917
3918 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3919
3920 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3921 function_def: "function_definition",
3922 class_def: "class_definition",
3923 test_fn_prefix: "test_",
3924 test_class_prefix: "Test",
3925 assertion_attr_prefix: "assert",
3926 };
3927
3928 #[must_use]
3930 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3931 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3932 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3933 }
3934
3935 #[must_use]
3937 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3938 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3939 analyze_lines(
3940 text,
3941 &lang,
3942 &["comment"],
3943 Some("expression_statement"),
3944 &PYTHON_SYMBOLS,
3945 )
3946 }
3947}
3948
3949#[cfg(test)]
3950mod tests {
3951 use super::*;
3952
3953 #[test]
3954 fn python_docstrings_are_separated() {
3955 let input = r#""""module docs"""
3956
3957
3958def fn_a():
3959 """function docs"""
3960 value = 1 # trailing comment
3961 return value
3962"#;
3963
3964 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3965 assert_eq!(result.raw.docstring_comment_lines, 2);
3966 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3967 assert_eq!(result.raw.code_only_lines, 2);
3968 }
3969
3970 #[test]
3971 fn c_style_mixed_lines_are_captured() {
3972 let input = "int x = 1; // note\n/* block */\n";
3973 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3974 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3975 assert_eq!(result.raw.multi_comment_only_lines, 1);
3976 }
3977
3978 #[test]
3979 fn detect_language_by_shebang() {
3980 let language = detect_language(
3981 Path::new("script"),
3982 Some("#!/usr/bin/env bash"),
3983 &BTreeMap::new(),
3984 true,
3985 );
3986 assert_eq!(language, Some(Language::Shell));
3987 }
3988
3989 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3992 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3993 let r = &result.raw;
3994 (
3995 r.functions,
3996 r.classes,
3997 r.variables,
3998 r.imports,
3999 r.test_count,
4000 r.test_assertion_count,
4001 r.test_suite_count,
4002 )
4003 }
4004
4005 #[test]
4006 fn python_test_fn_not_double_counted() {
4007 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
4009 assert_eq!(f, 0, "test fn must not also increment functions");
4010 assert_eq!(t, 1, "must be counted as a test");
4011 assert_eq!(c, 0);
4012 }
4013
4014 #[test]
4015 fn python_test_class_not_double_counted() {
4016 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
4018 assert_eq!(c, 0, "test class must not also increment classes");
4019 assert_eq!(t, 1, "must be counted as a test");
4020 assert_eq!(f, 0);
4021 }
4022
4023 #[test]
4024 fn python_regular_fn_counts_as_function() {
4025 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
4026 assert_eq!(f, 1, "regular function must be counted");
4027 assert_eq!(t, 0);
4028 assert_eq!(c, 0);
4029 }
4030
4031 #[test]
4032 fn python_regular_class_counts_as_class() {
4033 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
4034 assert_eq!(c, 1, "regular class must be counted");
4035 assert_eq!(t, 0);
4036 assert_eq!(f, 0);
4037 }
4038
4039 #[test]
4040 fn go_test_fn_not_double_counted() {
4041 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
4042 assert_eq!(f, 0, "Go test func must not also increment functions");
4043 assert_eq!(t, 1, "must be counted as a test");
4044 }
4045
4046 #[test]
4047 fn go_benchmark_fn_not_double_counted() {
4048 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
4049 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
4050 assert_eq!(t, 1, "must be counted as a test");
4051 }
4052
4053 #[test]
4054 fn go_regular_fn_counts_as_function() {
4055 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
4056 assert_eq!(f, 1, "regular Go func must be counted");
4057 assert_eq!(t, 0);
4058 }
4059
4060 #[test]
4061 fn rust_test_attr_counts_as_test_not_function() {
4062 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
4064 assert_eq!(t, 1, "#[test] must be counted as a test");
4065 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
4066 }
4067
4068 #[test]
4069 fn rust_fn_line_counts_as_function_not_test() {
4070 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
4072 assert_eq!(f, 1, "fn declaration must count as a function");
4073 assert_eq!(
4074 t, 0,
4075 "fn declaration line must not be double-counted as a test"
4076 );
4077 }
4078
4079 #[test]
4080 fn js_describe_counts_as_test_not_function() {
4081 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
4082 assert_eq!(t, 1, "describe must be counted as a test");
4083 assert_eq!(f, 0, "describe must not be counted as a function");
4084 }
4085
4086 #[test]
4087 fn js_regular_fn_counts_as_function() {
4088 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
4089 assert_eq!(f, 1, "JS function declaration must be counted");
4090 assert_eq!(t, 0);
4091 }
4092
4093 use std::collections::BTreeMap;
4096 use std::path::Path;
4097
4098 #[test]
4099 fn detect_language_rs_extension() {
4100 let lang = detect_language(Path::new("foo.rs"), None, &BTreeMap::new(), false);
4101 assert_eq!(lang, Some(Language::Rust));
4102 }
4103
4104 #[test]
4105 fn detect_language_py_extension() {
4106 let lang = detect_language(Path::new("foo.py"), None, &BTreeMap::new(), false);
4107 assert_eq!(lang, Some(Language::Python));
4108 }
4109
4110 #[test]
4111 fn detect_language_ts_extension() {
4112 let lang = detect_language(Path::new("app.ts"), None, &BTreeMap::new(), false);
4113 assert_eq!(lang, Some(Language::TypeScript));
4114 }
4115
4116 #[test]
4117 fn detect_language_js_extension() {
4118 let lang = detect_language(Path::new("app.js"), None, &BTreeMap::new(), false);
4119 assert_eq!(lang, Some(Language::JavaScript));
4120 }
4121
4122 #[test]
4123 fn detect_language_go_extension() {
4124 let lang = detect_language(Path::new("main.go"), None, &BTreeMap::new(), false);
4125 assert_eq!(lang, Some(Language::Go));
4126 }
4127
4128 #[test]
4129 fn detect_language_c_extension() {
4130 let lang = detect_language(Path::new("main.c"), None, &BTreeMap::new(), false);
4131 assert_eq!(lang, Some(Language::C));
4132 }
4133
4134 #[test]
4135 fn detect_language_cpp_extension() {
4136 let lang = detect_language(Path::new("main.cpp"), None, &BTreeMap::new(), false);
4137 assert_eq!(lang, Some(Language::Cpp));
4138 }
4139
4140 #[test]
4141 fn detect_language_java_extension() {
4142 let lang = detect_language(Path::new("Main.java"), None, &BTreeMap::new(), false);
4143 assert_eq!(lang, Some(Language::Java));
4144 }
4145
4146 #[test]
4147 fn detect_language_makefile_exact_name() {
4148 let lang = detect_language(Path::new("Makefile"), None, &BTreeMap::new(), false);
4149 assert_eq!(lang, Some(Language::Makefile));
4150 }
4151
4152 #[test]
4153 fn detect_language_dockerfile_exact_name() {
4154 let lang = detect_language(Path::new("Dockerfile"), None, &BTreeMap::new(), false);
4155 assert_eq!(lang, Some(Language::Dockerfile));
4156 }
4157
4158 #[test]
4159 fn detect_language_rakefile() {
4160 let lang = detect_language(Path::new("Rakefile"), None, &BTreeMap::new(), false);
4161 assert_eq!(lang, Some(Language::Ruby));
4162 }
4163
4164 #[test]
4165 fn detect_language_gemfile() {
4166 let lang = detect_language(Path::new("Gemfile"), None, &BTreeMap::new(), false);
4167 assert_eq!(lang, Some(Language::Ruby));
4168 }
4169
4170 #[test]
4171 fn detect_language_unknown_extension_returns_none() {
4172 let lang = detect_language(Path::new("foo.xyz123"), None, &BTreeMap::new(), false);
4173 assert_eq!(lang, None);
4174 }
4175
4176 #[test]
4177 fn detect_language_extension_override() {
4178 let mut overrides = BTreeMap::new();
4179 overrides.insert("h".into(), "cpp".into());
4180 let lang = detect_language(Path::new("header.h"), None, &overrides, false);
4181 assert_eq!(lang, Some(Language::Cpp));
4182 }
4183
4184 #[test]
4185 fn detect_language_shebang_python() {
4186 let lang = detect_language(
4187 Path::new("script"),
4188 Some("#!/usr/bin/env python3"),
4189 &BTreeMap::new(),
4190 true,
4191 );
4192 assert_eq!(lang, Some(Language::Python));
4193 }
4194
4195 #[test]
4196 fn detect_language_shebang_bash() {
4197 let lang = detect_language(
4198 Path::new("script"),
4199 Some("#!/bin/bash"),
4200 &BTreeMap::new(),
4201 true,
4202 );
4203 assert_eq!(lang, Some(Language::Shell));
4204 }
4205
4206 #[test]
4207 fn detect_language_shebang_ruby() {
4208 let lang = detect_language(
4209 Path::new("script"),
4210 Some("#!/usr/bin/env ruby"),
4211 &BTreeMap::new(),
4212 true,
4213 );
4214 assert_eq!(lang, Some(Language::Ruby));
4215 }
4216
4217 #[test]
4218 fn detect_language_shebang_disabled() {
4219 let lang = detect_language(
4221 Path::new("script"),
4222 Some("#!/usr/bin/env python3"),
4223 &BTreeMap::new(),
4224 false,
4225 );
4226 assert_eq!(lang, None);
4227 }
4228
4229 #[test]
4230 fn from_name_rust() {
4231 assert_eq!(Language::from_name("rust"), Some(Language::Rust));
4232 }
4233
4234 #[test]
4235 fn from_name_python() {
4236 assert_eq!(Language::from_name("python"), Some(Language::Python));
4237 }
4238
4239 #[test]
4240 fn from_name_unknown() {
4241 assert_eq!(Language::from_name("brainfuck"), None);
4242 }
4243
4244 #[test]
4245 fn from_name_roundtrip_all() {
4246 for lang in [
4248 Language::C,
4249 Language::Cpp,
4250 Language::CSharp,
4251 Language::Go,
4252 Language::Java,
4253 Language::JavaScript,
4254 Language::Python,
4255 Language::Rust,
4256 Language::Shell,
4257 Language::PowerShell,
4258 Language::TypeScript,
4259 Language::Assembly,
4260 Language::Clojure,
4261 Language::Css,
4262 Language::Dart,
4263 Language::Dockerfile,
4264 Language::Elixir,
4265 Language::Erlang,
4266 Language::FSharp,
4267 Language::Groovy,
4268 Language::Haskell,
4269 Language::Html,
4270 Language::Julia,
4271 Language::Kotlin,
4272 Language::Lua,
4273 Language::Makefile,
4274 Language::Nim,
4275 Language::ObjectiveC,
4276 Language::Ocaml,
4277 Language::Perl,
4278 Language::Php,
4279 Language::R,
4280 Language::Ruby,
4281 Language::Scala,
4282 Language::Scss,
4283 Language::Sql,
4284 Language::Svelte,
4285 Language::Swift,
4286 Language::Vue,
4287 Language::Xml,
4288 Language::Zig,
4289 ] {
4290 let slug = lang.as_slug();
4291 let roundtripped = Language::from_name(slug);
4292 assert_eq!(
4293 roundtripped,
4294 Some(lang),
4295 "from_name({slug:?}) should return {lang:?}"
4296 );
4297 }
4298 }
4299
4300 #[test]
4303 fn blank_in_block_comment_defaults_to_comment() {
4304 let input = "/*\n\n*/";
4306 let opts = AnalysisOptions {
4307 blank_in_block_comment_as_comment: true,
4308 ..Default::default()
4309 };
4310 let result = analyze_text(Language::C, input, opts);
4311 assert_eq!(
4312 result.raw.multi_comment_only_lines, 3,
4313 "all 3 block-comment lines must count as multi-comment with CountAsComment policy"
4314 );
4315 assert_eq!(
4316 result.raw.blank_only_lines, 0,
4317 "no blank lines expected with CountAsComment policy"
4318 );
4319 }
4320
4321 #[test]
4322 fn blank_in_block_comment_counted_as_blank_when_policy_false() {
4323 let input = "/*\n\n*/";
4325 let opts = AnalysisOptions {
4326 blank_in_block_comment_as_comment: false,
4327 ..Default::default()
4328 };
4329 let result = analyze_text(Language::C, input, opts);
4330 assert_eq!(
4331 result.raw.multi_comment_only_lines, 2,
4332 "opener and closer must count as multi-comment with CountAsBlank policy"
4333 );
4334 assert_eq!(
4335 result.raw.blank_only_lines, 1,
4336 "the blank line inside the block comment must count as blank with CountAsBlank policy"
4337 );
4338 }
4339
4340 #[test]
4343 fn continuation_lines_each_physical_default() {
4344 let input = "#define FOO \\\n 1 \\\n + 2\n";
4346 let opts = AnalysisOptions {
4347 collapse_continuation_lines: false,
4348 ..Default::default()
4349 };
4350 let result = analyze_text(Language::C, input, opts);
4351 assert_eq!(
4352 result.raw.total_physical_lines, 3,
4353 "3 physical lines expected"
4354 );
4355 assert_eq!(
4356 result.raw.code_only_lines, 3,
4357 "each physical line must count as code with EachPhysicalLine policy"
4358 );
4359 }
4360
4361 #[test]
4362 fn continuation_lines_collapse_to_logical() {
4363 let input = "#define FOO \\\n 1 \\\n + 2\n";
4365 let opts = AnalysisOptions {
4366 collapse_continuation_lines: true,
4367 ..Default::default()
4368 };
4369 let result = analyze_text(Language::C, input, opts);
4370 assert_eq!(
4371 result.raw.total_physical_lines, 3,
4372 "physical line count is always 3 regardless of policy"
4373 );
4374 assert_eq!(
4375 result.raw.code_only_lines, 1,
4376 "3 continuation lines must collapse to 1 logical code line"
4377 );
4378 }
4379}