1pub mod style;
5pub use style::{IndentStyle, StyleAnalysis, StyleGuideScore, StyleSignal};
6
7use std::collections::{BTreeMap, BTreeSet, HashSet};
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum Language {
15 C,
16 Cpp,
17 CSharp,
18 Go,
19 Java,
20 JavaScript,
21 Python,
22 Rust,
23 Shell,
24 PowerShell,
25 TypeScript,
26 Assembly,
28 Clojure,
29 Css,
30 Dart,
31 Dockerfile,
32 Elixir,
33 Erlang,
34 FSharp,
35 Groovy,
36 Haskell,
37 Html,
38 Julia,
39 Kotlin,
40 Lua,
41 Makefile,
42 Nim,
43 ObjectiveC,
44 Ocaml,
45 Perl,
46 Php,
47 R,
48 Ruby,
49 Scala,
50 Scss,
51 Sql,
52 Svelte,
53 Swift,
54 Vue,
55 Xml,
56 Zig,
57 Solidity,
59 Protobuf,
60 Hcl,
61 GraphQl,
62 Ada,
64 Vhdl,
65 Verilog,
66 Tcl,
67 Pascal,
68 VisualBasic,
69 Lisp,
70 Fortran,
72 Nix,
73 Crystal,
74 D,
75 Glsl,
76 Cmake,
77 Elm,
78 Awk,
79}
80
81impl Language {
82 #[must_use]
83 pub const fn display_name(&self) -> &'static str {
84 match self {
85 Self::C => "C",
86 Self::Cpp => "C++",
87 Self::CSharp => "C#",
88 Self::Go => "Go",
89 Self::Java => "Java",
90 Self::JavaScript => "JavaScript",
91 Self::Python => "Python",
92 Self::Rust => "Rust",
93 Self::Shell => "Shell",
94 Self::PowerShell => "PowerShell",
95 Self::TypeScript => "TypeScript",
96 Self::Assembly => "Assembly",
97 Self::Clojure => "Clojure",
98 Self::Css => "CSS",
99 Self::Dart => "Dart",
100 Self::Dockerfile => "Dockerfile",
101 Self::Elixir => "Elixir",
102 Self::Erlang => "Erlang",
103 Self::FSharp => "F#",
104 Self::Groovy => "Groovy",
105 Self::Haskell => "Haskell",
106 Self::Html => "HTML",
107 Self::Julia => "Julia",
108 Self::Kotlin => "Kotlin",
109 Self::Lua => "Lua",
110 Self::Makefile => "Makefile",
111 Self::Nim => "Nim",
112 Self::ObjectiveC => "Objective-C",
113 Self::Ocaml => "OCaml",
114 Self::Perl => "Perl",
115 Self::Php => "PHP",
116 Self::R => "R",
117 Self::Ruby => "Ruby",
118 Self::Scala => "Scala",
119 Self::Scss => "SCSS",
120 Self::Sql => "SQL",
121 Self::Svelte => "Svelte",
122 Self::Swift => "Swift",
123 Self::Vue => "Vue",
124 Self::Xml => "XML",
125 Self::Zig => "Zig",
126 Self::Solidity => "Solidity",
127 Self::Protobuf => "Protocol Buffers",
128 Self::Hcl => "HCL/Terraform",
129 Self::GraphQl => "GraphQL",
130 Self::Ada => "Ada",
131 Self::Vhdl => "VHDL",
132 Self::Verilog => "Verilog/SystemVerilog",
133 Self::Tcl => "Tcl",
134 Self::Pascal => "Pascal/Delphi",
135 Self::VisualBasic => "Visual Basic",
136 Self::Lisp => "Lisp/Scheme",
137 Self::Fortran => "Fortran",
138 Self::Nix => "Nix",
139 Self::Crystal => "Crystal",
140 Self::D => "D",
141 Self::Glsl => "GLSL/HLSL",
142 Self::Cmake => "CMake",
143 Self::Elm => "Elm",
144 Self::Awk => "Awk",
145 }
146 }
147
148 #[must_use]
149 pub const fn as_slug(&self) -> &'static str {
150 match self {
151 Self::C => "c",
152 Self::Cpp => "cpp",
153 Self::CSharp => "csharp",
154 Self::Go => "go",
155 Self::Java => "java",
156 Self::JavaScript => "javascript",
157 Self::Python => "python",
158 Self::Rust => "rust",
159 Self::Shell => "shell",
160 Self::PowerShell => "powershell",
161 Self::TypeScript => "typescript",
162 Self::Assembly => "assembly",
163 Self::Clojure => "clojure",
164 Self::Css => "css",
165 Self::Dart => "dart",
166 Self::Dockerfile => "dockerfile",
167 Self::Elixir => "elixir",
168 Self::Erlang => "erlang",
169 Self::FSharp => "fsharp",
170 Self::Groovy => "groovy",
171 Self::Haskell => "haskell",
172 Self::Html => "html",
173 Self::Julia => "julia",
174 Self::Kotlin => "kotlin",
175 Self::Lua => "lua",
176 Self::Makefile => "makefile",
177 Self::Nim => "nim",
178 Self::ObjectiveC => "objectivec",
179 Self::Ocaml => "ocaml",
180 Self::Perl => "perl",
181 Self::Php => "php",
182 Self::R => "r",
183 Self::Ruby => "ruby",
184 Self::Scala => "scala",
185 Self::Scss => "scss",
186 Self::Sql => "sql",
187 Self::Svelte => "svelte",
188 Self::Swift => "swift",
189 Self::Vue => "vue",
190 Self::Xml => "xml",
191 Self::Zig => "zig",
192 Self::Solidity => "solidity",
193 Self::Protobuf => "protobuf",
194 Self::Hcl => "hcl",
195 Self::GraphQl => "graphql",
196 Self::Ada => "ada",
197 Self::Vhdl => "vhdl",
198 Self::Verilog => "verilog",
199 Self::Tcl => "tcl",
200 Self::Pascal => "pascal",
201 Self::VisualBasic => "visualbasic",
202 Self::Lisp => "lisp",
203 Self::Fortran => "fortran",
204 Self::Nix => "nix",
205 Self::Crystal => "crystal",
206 Self::D => "d",
207 Self::Glsl => "glsl",
208 Self::Cmake => "cmake",
209 Self::Elm => "elm",
210 Self::Awk => "awk",
211 }
212 }
213
214 #[must_use]
215 pub fn from_name(name: &str) -> Option<Self> {
216 match name.trim().to_ascii_lowercase().as_str() {
217 "c" => Some(Self::C),
218 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
219 "csharp" | "c#" | "cs" => Some(Self::CSharp),
220 "go" | "golang" => Some(Self::Go),
221 "java" => Some(Self::Java),
222 "javascript" | "js" => Some(Self::JavaScript),
223 "python" | "py" => Some(Self::Python),
224 "rust" | "rs" => Some(Self::Rust),
225 "shell" | "sh" | "bash" => Some(Self::Shell),
226 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
227 "typescript" | "ts" => Some(Self::TypeScript),
228 "assembly" | "asm" => Some(Self::Assembly),
229 "clojure" | "clj" => Some(Self::Clojure),
230 "css" => Some(Self::Css),
231 "dart" => Some(Self::Dart),
232 "dockerfile" | "docker" => Some(Self::Dockerfile),
233 "elixir" | "ex" => Some(Self::Elixir),
234 "erlang" | "erl" => Some(Self::Erlang),
235 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
236 "groovy" => Some(Self::Groovy),
237 "haskell" | "hs" => Some(Self::Haskell),
238 "html" | "htm" => Some(Self::Html),
239 "julia" | "jl" => Some(Self::Julia),
240 "kotlin" | "kt" => Some(Self::Kotlin),
241 "lua" => Some(Self::Lua),
242 "makefile" | "make" | "mk" => Some(Self::Makefile),
243 "nim" => Some(Self::Nim),
244 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
245 "ocaml" | "ml" => Some(Self::Ocaml),
246 "perl" | "pl" => Some(Self::Perl),
247 "php" => Some(Self::Php),
248 "r" => Some(Self::R),
249 "ruby" | "rb" => Some(Self::Ruby),
250 "scala" => Some(Self::Scala),
251 "scss" | "sass" => Some(Self::Scss),
252 "sql" => Some(Self::Sql),
253 "svelte" => Some(Self::Svelte),
254 "swift" => Some(Self::Swift),
255 "vue" => Some(Self::Vue),
256 "xml" => Some(Self::Xml),
257 "zig" => Some(Self::Zig),
258 "solidity" | "sol" => Some(Self::Solidity),
259 "protobuf" | "proto" | "protocolbuffers" => Some(Self::Protobuf),
260 "hcl" | "terraform" | "tf" => Some(Self::Hcl),
261 "graphql" | "gql" => Some(Self::GraphQl),
262 "ada" => Some(Self::Ada),
263 "vhdl" => Some(Self::Vhdl),
264 "verilog" | "systemverilog" | "sv" => Some(Self::Verilog),
265 "tcl" => Some(Self::Tcl),
266 "pascal" | "delphi" | "pas" => Some(Self::Pascal),
267 "visualbasic" | "vb" | "vbnet" | "vb.net" => Some(Self::VisualBasic),
268 "lisp" | "scheme" | "racket" | "clisp" | "elisp" => Some(Self::Lisp),
269 "fortran" | "f90" | "f95" => Some(Self::Fortran),
270 "nix" => Some(Self::Nix),
271 "crystal" | "cr" => Some(Self::Crystal),
272 "d" | "dlang" => Some(Self::D),
273 "glsl" | "hlsl" | "shader" | "wgsl" => Some(Self::Glsl),
274 "cmake" => Some(Self::Cmake),
275 "elm" => Some(Self::Elm),
276 "awk" => Some(Self::Awk),
277 _ => None,
278 }
279 }
280}
281
282#[derive(Debug, Clone, Serialize, Deserialize, Default)]
283pub struct RawLineCounts {
284 pub total_physical_lines: u64,
285 pub blank_only_lines: u64,
286 pub code_only_lines: u64,
287 pub single_comment_only_lines: u64,
288 pub multi_comment_only_lines: u64,
289 pub mixed_code_single_comment_lines: u64,
290 pub mixed_code_multi_comment_lines: u64,
291 pub docstring_comment_lines: u64,
292 pub skipped_unknown_lines: u64,
293 #[serde(default)]
295 pub functions: u64,
296 #[serde(default)]
298 pub classes: u64,
299 #[serde(default)]
301 pub variables: u64,
302 #[serde(default)]
304 pub imports: u64,
305 #[serde(default)]
309 pub compiler_directive_lines: u64,
310 #[serde(default)]
313 pub test_count: u64,
314 #[serde(default)]
317 pub test_assertion_count: u64,
318 #[serde(default)]
321 pub test_suite_count: u64,
322 #[serde(default)]
325 pub cyclomatic_complexity: u32,
326 #[serde(default, skip_serializing_if = "Option::is_none")]
329 pub lsloc: Option<u32>,
330 #[serde(skip)]
333 pub code_line_hashes: Vec<u64>,
334}
335
336#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
337#[serde(rename_all = "snake_case")]
338pub enum ParseMode {
339 Lexical,
340 LexicalBestEffort,
341 TreeSitter,
342}
343
344#[derive(Debug, Clone, Serialize, Deserialize)]
345pub struct RawFileAnalysis {
346 pub raw: RawLineCounts,
347 pub parse_mode: ParseMode,
348 pub warnings: Vec<String>,
349 #[serde(default, skip_serializing_if = "Option::is_none")]
351 pub style_analysis: Option<StyleAnalysis>,
352}
353
354#[derive(Debug, Clone, Copy)]
359pub struct AnalysisOptions {
360 pub blank_in_block_comment_as_comment: bool,
363 pub collapse_continuation_lines: bool,
366 pub enable_style: bool,
369 pub style_lang_scope: StyleLangScope,
372}
373
374#[derive(Debug, Clone, Copy, PartialEq, Eq)]
376pub enum StyleLangScope {
377 All,
378 CFamilyOnly,
379}
380
381#[derive(Debug, Clone, Copy, PartialEq, Eq)]
383pub enum LslocStrategy {
384 Semicolons,
386 NonContinuationNewlines,
389 Unsupported,
392}
393
394impl Default for AnalysisOptions {
395 fn default() -> Self {
396 Self {
397 blank_in_block_comment_as_comment: true,
398 collapse_continuation_lines: false,
399 enable_style: true,
400 style_lang_scope: StyleLangScope::All,
401 }
402 }
403}
404
405#[must_use]
406pub fn supported_languages() -> BTreeSet<Language> {
407 [
408 Language::Assembly,
409 Language::C,
410 Language::Clojure,
411 Language::Cpp,
412 Language::CSharp,
413 Language::Css,
414 Language::Dart,
415 Language::Dockerfile,
416 Language::Elixir,
417 Language::Erlang,
418 Language::FSharp,
419 Language::Go,
420 Language::Groovy,
421 Language::Haskell,
422 Language::Html,
423 Language::Java,
424 Language::JavaScript,
425 Language::Julia,
426 Language::Kotlin,
427 Language::Lua,
428 Language::Makefile,
429 Language::Nim,
430 Language::ObjectiveC,
431 Language::Ocaml,
432 Language::Perl,
433 Language::Php,
434 Language::PowerShell,
435 Language::Python,
436 Language::R,
437 Language::Ruby,
438 Language::Rust,
439 Language::Scala,
440 Language::Scss,
441 Language::Shell,
442 Language::Sql,
443 Language::Svelte,
444 Language::Swift,
445 Language::TypeScript,
446 Language::Vue,
447 Language::Xml,
448 Language::Zig,
449 Language::Solidity,
450 Language::Protobuf,
451 Language::Hcl,
452 Language::GraphQl,
453 Language::Ada,
454 Language::Vhdl,
455 Language::Verilog,
456 Language::Tcl,
457 Language::Pascal,
458 Language::VisualBasic,
459 Language::Lisp,
460 Language::Fortran,
461 Language::Nix,
462 Language::Crystal,
463 Language::D,
464 Language::Glsl,
465 Language::Cmake,
466 Language::Elm,
467 Language::Awk,
468 ]
469 .into_iter()
470 .collect()
471}
472
473fn detect_by_shebang(line: &str) -> Option<Language> {
475 let lower = line.to_ascii_lowercase();
476 if !lower.starts_with("#!") {
477 return None;
478 }
479 if lower.contains("python") {
480 return Some(Language::Python);
481 }
482 if lower.contains("pwsh") || lower.contains("powershell") {
483 return Some(Language::PowerShell);
484 }
485 if lower.contains("bash")
486 || lower.contains("/sh")
487 || lower.contains("zsh")
488 || lower.contains("ksh")
489 {
490 return Some(Language::Shell);
491 }
492 if lower.contains("ruby") {
493 return Some(Language::Ruby);
494 }
495 if lower.contains("perl") {
496 return Some(Language::Perl);
497 }
498 if lower.contains("php") {
499 return Some(Language::Php);
500 }
501 if lower.contains("node") || lower.contains("nodejs") {
502 return Some(Language::JavaScript);
503 }
504 None
505}
506
507#[allow(clippy::too_many_lines)]
509fn detect_by_extension(ext: &str) -> Option<Language> {
510 static EXT_MAP: &[(&str, Language)] = &[
512 ("c", Language::C),
513 ("h", Language::C),
514 ("cc", Language::Cpp),
515 ("cp", Language::Cpp),
516 ("cpp", Language::Cpp),
517 ("cxx", Language::Cpp),
518 ("hh", Language::Cpp),
519 ("hpp", Language::Cpp),
520 ("hxx", Language::Cpp),
521 ("cs", Language::CSharp),
522 ("go", Language::Go),
523 ("java", Language::Java),
524 ("js", Language::JavaScript),
525 ("mjs", Language::JavaScript),
526 ("cjs", Language::JavaScript),
527 ("py", Language::Python),
528 ("rs", Language::Rust),
529 ("sh", Language::Shell),
530 ("bash", Language::Shell),
531 ("zsh", Language::Shell),
532 ("ksh", Language::Shell),
533 ("ps1", Language::PowerShell),
534 ("psm1", Language::PowerShell),
535 ("psd1", Language::PowerShell),
536 ("ts", Language::TypeScript),
537 ("mts", Language::TypeScript),
538 ("cts", Language::TypeScript),
539 ("tsx", Language::TypeScript),
540 ("jsx", Language::JavaScript),
541 ("asm", Language::Assembly),
542 ("s", Language::Assembly),
543 ("clj", Language::Clojure),
544 ("cljs", Language::Clojure),
545 ("cljc", Language::Clojure),
546 ("edn", Language::Clojure),
547 ("css", Language::Css),
548 ("dart", Language::Dart),
549 ("ex", Language::Elixir),
550 ("exs", Language::Elixir),
551 ("erl", Language::Erlang),
552 ("hrl", Language::Erlang),
553 ("fs", Language::FSharp),
554 ("fsi", Language::FSharp),
555 ("fsx", Language::FSharp),
556 ("groovy", Language::Groovy),
557 ("gradle", Language::Groovy),
558 ("hs", Language::Haskell),
559 ("lhs", Language::Haskell),
560 ("html", Language::Html),
561 ("htm", Language::Html),
562 ("xhtml", Language::Html),
563 ("jl", Language::Julia),
564 ("kt", Language::Kotlin),
565 ("kts", Language::Kotlin),
566 ("lua", Language::Lua),
567 ("mk", Language::Makefile),
568 ("nim", Language::Nim),
569 ("nims", Language::Nim),
570 ("m", Language::ObjectiveC),
571 ("mm", Language::ObjectiveC),
572 ("ml", Language::Ocaml),
573 ("mli", Language::Ocaml),
574 ("pl", Language::Perl),
575 ("pm", Language::Perl),
576 ("t", Language::Perl),
577 ("php", Language::Php),
578 ("php3", Language::Php),
579 ("php4", Language::Php),
580 ("php5", Language::Php),
581 ("php7", Language::Php),
582 ("phtml", Language::Php),
583 ("r", Language::R),
584 ("rb", Language::Ruby),
585 ("rake", Language::Ruby),
586 ("scala", Language::Scala),
587 ("sc", Language::Scala),
588 ("scss", Language::Scss),
589 ("sass", Language::Scss),
590 ("sql", Language::Sql),
591 ("svelte", Language::Svelte),
592 ("swift", Language::Swift),
593 ("vue", Language::Vue),
594 ("xml", Language::Xml),
595 ("xsd", Language::Xml),
596 ("xsl", Language::Xml),
597 ("xslt", Language::Xml),
598 ("svg", Language::Xml),
599 ("zig", Language::Zig),
600 ("sol", Language::Solidity),
601 ("proto", Language::Protobuf),
602 ("tf", Language::Hcl),
603 ("tfvars", Language::Hcl),
604 ("hcl", Language::Hcl),
605 ("graphql", Language::GraphQl),
606 ("gql", Language::GraphQl),
607 ("adb", Language::Ada),
608 ("ads", Language::Ada),
609 ("ada", Language::Ada),
610 ("vhd", Language::Vhdl),
611 ("vhdl", Language::Vhdl),
612 ("v", Language::Verilog),
613 ("sv", Language::Verilog),
614 ("svh", Language::Verilog),
615 ("vh", Language::Verilog),
616 ("tcl", Language::Tcl),
617 ("pas", Language::Pascal),
618 ("dpr", Language::Pascal),
619 ("vb", Language::VisualBasic),
620 ("bas", Language::VisualBasic),
621 ("lisp", Language::Lisp),
622 ("lsp", Language::Lisp),
623 ("el", Language::Lisp),
624 ("scm", Language::Lisp),
625 ("ss", Language::Lisp),
626 ("rkt", Language::Lisp),
627 ("f90", Language::Fortran),
628 ("f95", Language::Fortran),
629 ("f03", Language::Fortran),
630 ("f08", Language::Fortran),
631 ("f", Language::Fortran),
632 ("for", Language::Fortran),
633 ("nix", Language::Nix),
634 ("cr", Language::Crystal),
635 ("d", Language::D),
636 ("glsl", Language::Glsl),
637 ("vert", Language::Glsl),
638 ("frag", Language::Glsl),
639 ("comp", Language::Glsl),
640 ("geom", Language::Glsl),
641 ("tesc", Language::Glsl),
642 ("tese", Language::Glsl),
643 ("hlsl", Language::Glsl),
644 ("wgsl", Language::Glsl),
645 ("cmake", Language::Cmake),
646 ("elm", Language::Elm),
647 ("awk", Language::Awk),
648 ];
649 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
650}
651
652fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
654 if filename == "Dockerfile"
656 || filename.starts_with("Dockerfile.")
657 || filename_lower == "dockerfile"
658 {
659 return Some(Language::Dockerfile);
660 }
661 if matches!(
663 filename,
664 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
665 ) {
666 return Some(Language::Makefile);
667 }
668 if matches!(
670 filename,
671 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
672 ) {
673 return Some(Language::Ruby);
674 }
675 if filename == "CMakeLists.txt" || filename_lower == "cmakelists.txt" {
678 return Some(Language::Cmake);
679 }
680 None
681}
682
683#[must_use]
684#[allow(clippy::too_many_lines)]
685pub fn detect_language(
686 path: &Path,
687 first_line: Option<&str>,
688 extension_overrides: &BTreeMap<String, String>,
689 shebang_detection: bool,
690) -> Option<Language> {
691 let extension = path
692 .extension()
693 .and_then(|ext| ext.to_str())
694 .map(str::to_ascii_lowercase);
695
696 if let Some(ext) = extension.as_ref() {
698 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
699 if let Some(lang) = Language::from_name(override_name) {
700 return Some(lang);
701 }
702 }
703 }
704
705 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
707 let filename_lower = filename.to_ascii_lowercase();
708
709 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
710 return Some(lang);
711 }
712
713 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
715 return Some(lang);
716 }
717
718 if shebang_detection {
720 if let Some(line) = first_line {
721 if let Some(lang) = detect_by_shebang(line) {
722 return Some(lang);
723 }
724 }
725 }
726
727 None
728}
729
730#[must_use]
731pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
732 #[cfg(feature = "tree-sitter")]
734 {
735 match language {
736 Language::C | Language::Cpp => {
737 if let Some(mut result) = ts::analyze_c(text) {
738 if options.enable_style
739 && should_style_analyse(language, options.style_lang_scope)
740 {
741 result.style_analysis = style::analyze_style(language, text);
742 }
743 return result;
744 }
745 }
746 Language::Python => {
747 if let Some(result) = ts::analyze_python(text) {
748 return result;
749 }
750 }
751 _ => {}
752 }
753 }
754
755 let (mut config, has_preprocessor) = language_scan_config(language);
756
757 if language == Language::Python {
759 config.skip_lines = detect_python_docstring_lines(text);
760 }
761
762 let flags = IeeeFlags {
765 has_preprocessor_directives: has_preprocessor,
766 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
767 collapse_continuation_lines: options.collapse_continuation_lines,
768 };
769 let mut result = analyze_generic(text, config, flags);
770 if options.enable_style && should_style_analyse(language, options.style_lang_scope) {
771 result.style_analysis = style::analyze_style(language, text);
772 }
773 result
774}
775
776const fn should_style_analyse(language: Language, scope: StyleLangScope) -> bool {
778 match scope {
779 StyleLangScope::CFamilyOnly => {
780 matches!(language, Language::C | Language::Cpp | Language::ObjectiveC)
781 }
782 StyleLangScope::All => true,
783 }
784}
785
786fn language_scan_config(language: Language) -> (ScanConfig, bool) {
794 let cfg = LANG_SCAN_TABLE
795 .iter()
796 .find_map(|&(l, c)| (l == language).then_some(c))
797 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
798 let (branch_keywords, lsloc_strategy) = language_complexity_config(language);
799 (
800 ScanConfig {
801 line_comments: cfg.line_comments,
802 block_comment: cfg.block_comment,
803 allow_single_quote_strings: cfg.allow_single_quote_strings,
804 allow_double_quote_strings: cfg.allow_double_quote_strings,
805 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
806 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
807 skip_lines: HashSet::new(),
808 symbol_patterns: cfg.symbol_patterns,
809 branch_keywords,
810 lsloc_strategy,
811 },
812 cfg.has_preprocessor,
813 )
814}
815
816const BRANCH_C_FAMILY: &[&str] = &[
821 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&",
822];
823const BRANCH_C_TERNARY: &[&str] = &[
824 "if", "else", "for", "while", "switch", "case", "catch", "||", "&&", "?",
825];
826const BRANCH_GO: &[&str] = &["if", "else", "for", "switch", "case", "select", "||", "&&"];
827const BRANCH_RUST: &[&str] = &["if", "else", "for", "while", "match", "||", "&&"];
828const BRANCH_ZIG: &[&str] = &["if", "else", "for", "while", "switch", "catch", "||", "&&"];
829const BRANCH_FSHARP: &[&str] = &["if", "then", "else", "elif", "match", "when", "||", "&&"];
830const BRANCH_LUA: &[&str] = &[
831 "if", "elseif", "else", "for", "while", "repeat", "and", "or",
832];
833const BRANCH_HASKELL: &[&str] = &["if", "then", "else", "case", "otherwise"];
834const BRANCH_SQL: &[&str] = &["CASE", "WHEN", "IF", "ELSE", "case", "when", "if", "else"];
835const BRANCH_OCAML: &[&str] = &["if", "then", "else", "match", "when", "||", "&&"];
836const BRANCH_CLOJURE: &[&str] = &["if", "when", "cond", "case", "and", "or"];
837const BRANCH_PHP: &[&str] = &[
838 "if", "elseif", "else", "for", "while", "switch", "case", "catch", "match", "||", "&&", "?",
839];
840const BRANCH_JULIA: &[&str] = &["if", "elseif", "else", "for", "while", "catch", "||", "&&"];
841const BRANCH_PYTHON: &[&str] = &["if", "elif", "else", "for", "while", "except", "or", "and"];
842const BRANCH_RUBY: &[&str] = &[
843 "if", "elsif", "else", "unless", "until", "while", "case", "when", "rescue", "||", "&&",
844];
845const BRANCH_SHELL: &[&str] = &["if", "elif", "else", "while", "until", "case", "||", "&&"];
846const BRANCH_ELIXIR: &[&str] = &[
847 "if", "else", "cond", "case", "when", "rescue", "||", "&&", "and", "or",
848];
849const BRANCH_POWERSHELL: &[&str] = &[
850 "if", "elseif", "else", "for", "while", "switch", "foreach", "||", "&&",
851];
852const BRANCH_NIM: &[&str] = &[
853 "if", "elif", "else", "for", "while", "case", "of", "except", "and", "or",
854];
855const BRANCH_PERL: &[&str] = &[
856 "if", "elsif", "else", "unless", "until", "for", "while", "foreach", "||", "&&",
857];
858const BRANCH_R: &[&str] = &["if", "else", "for", "while", "repeat", "||", "&&"];
859const BRANCH_ADA: &[&str] = &[
861 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or",
862];
863const BRANCH_VHDL: &[&str] = &[
864 "if", "elsif", "else", "case", "when", "loop", "while", "for", "and", "or", "nand", "nor",
865 "xor",
866];
867const BRANCH_VERILOG: &[&str] = &[
868 "if", "else", "case", "casex", "casez", "for", "while", "&&", "||",
869];
870const BRANCH_TCL: &[&str] = &["if", "elseif", "else", "switch", "while", "for", "foreach"];
871const BRANCH_PASCAL: &[&str] = &[
872 "if", "then", "else", "case", "while", "for", "repeat", "until", "and", "or",
873];
874const BRANCH_VB: &[&str] = &[
875 "If", "Then", "ElseIf", "Else", "Select", "Case", "While", "For", "Do", "And", "Or",
876];
877const BRANCH_LISP: &[&str] = &["if", "when", "unless", "cond", "case", "and", "or"];
878const BRANCH_FORTRAN: &[&str] = &[
880 "if", "then", "else", "elseif", "case", "do", "while", "where",
881];
882const BRANCH_NIX: &[&str] = &["if", "then", "else"];
883const BRANCH_CMAKE: &[&str] = &["if(", "elseif(", "else(", "while(", "foreach("];
884const BRANCH_ELM: &[&str] = &["if", "then", "else", "case", "of"];
885const BRANCH_AWK: &[&str] = &["if", "else", "while", "for", "do"];
886
887const fn language_complexity_config(
890 language: Language,
891) -> (&'static [&'static str], LslocStrategy) {
892 match language {
893 Language::C
895 | Language::Cpp
896 | Language::ObjectiveC
897 | Language::CSharp
898 | Language::JavaScript
899 | Language::TypeScript
900 | Language::Svelte
901 | Language::Vue
902 | Language::Dart
903 | Language::Groovy
904 | Language::Swift
905 | Language::Solidity => (BRANCH_C_TERNARY, LslocStrategy::Semicolons),
906 Language::Java | Language::Kotlin | Language::Scala | Language::D | Language::Glsl => {
908 (BRANCH_C_FAMILY, LslocStrategy::Semicolons)
909 }
910 Language::Go => (BRANCH_GO, LslocStrategy::Semicolons),
911 Language::Rust => (BRANCH_RUST, LslocStrategy::Semicolons),
912 Language::Zig => (BRANCH_ZIG, LslocStrategy::Semicolons),
913 Language::FSharp => (BRANCH_FSHARP, LslocStrategy::Unsupported),
914 Language::Shell => (BRANCH_SHELL, LslocStrategy::NonContinuationNewlines),
916 Language::Elixir => (BRANCH_ELIXIR, LslocStrategy::NonContinuationNewlines),
917 Language::Perl => (BRANCH_PERL, LslocStrategy::Semicolons),
918 Language::R => (BRANCH_R, LslocStrategy::NonContinuationNewlines),
919 Language::Ruby | Language::Crystal => (BRANCH_RUBY, LslocStrategy::NonContinuationNewlines),
920 Language::Python => (BRANCH_PYTHON, LslocStrategy::NonContinuationNewlines),
921 Language::PowerShell => (BRANCH_POWERSHELL, LslocStrategy::Unsupported),
922 Language::Nim => (BRANCH_NIM, LslocStrategy::NonContinuationNewlines),
923 Language::Lua => (BRANCH_LUA, LslocStrategy::Unsupported),
925 Language::Haskell => (BRANCH_HASKELL, LslocStrategy::Unsupported),
926 Language::Sql => (BRANCH_SQL, LslocStrategy::Semicolons),
927 Language::Ocaml => (BRANCH_OCAML, LslocStrategy::Semicolons),
928 Language::Clojure => (BRANCH_CLOJURE, LslocStrategy::Unsupported),
929 Language::Php => (BRANCH_PHP, LslocStrategy::Semicolons),
930 Language::Julia => (BRANCH_JULIA, LslocStrategy::NonContinuationNewlines),
931 Language::Protobuf => (&[], LslocStrategy::Semicolons),
932 Language::Hcl => (&[], LslocStrategy::NonContinuationNewlines),
933 Language::Ada => (BRANCH_ADA, LslocStrategy::Semicolons),
935 Language::Vhdl => (BRANCH_VHDL, LslocStrategy::Semicolons),
936 Language::Verilog => (BRANCH_VERILOG, LslocStrategy::Semicolons),
937 Language::Tcl => (BRANCH_TCL, LslocStrategy::NonContinuationNewlines),
938 Language::Pascal => (BRANCH_PASCAL, LslocStrategy::Semicolons),
939 Language::VisualBasic => (BRANCH_VB, LslocStrategy::NonContinuationNewlines),
940 Language::Lisp => (BRANCH_LISP, LslocStrategy::Unsupported),
941 Language::Fortran => (BRANCH_FORTRAN, LslocStrategy::NonContinuationNewlines),
943 Language::Nix => (BRANCH_NIX, LslocStrategy::Unsupported),
944 Language::Cmake => (BRANCH_CMAKE, LslocStrategy::Unsupported),
945 Language::Elm => (BRANCH_ELM, LslocStrategy::Unsupported),
946 Language::Awk => (BRANCH_AWK, LslocStrategy::NonContinuationNewlines),
947 Language::Makefile
949 | Language::Dockerfile
950 | Language::Css
951 | Language::Html
952 | Language::Xml
953 | Language::Assembly
954 | Language::Erlang
955 | Language::GraphQl
956 | Language::Scss => (&[], LslocStrategy::Unsupported),
957 }
958}
959
960#[derive(Debug, Clone, Copy)]
964struct SymbolPatterns {
965 functions: &'static [&'static str],
966 functions_prefix_paren: &'static [&'static str],
972 classes: &'static [&'static str],
973 variables: &'static [&'static str],
974 imports: &'static [&'static str],
975 tests: &'static [&'static str],
978 assertions: &'static [&'static str],
981 test_suites: &'static [&'static str],
984}
985
986impl SymbolPatterns {
987 const fn none() -> Self {
988 Self {
989 functions: &[],
990 functions_prefix_paren: &[],
991 classes: &[],
992 variables: &[],
993 imports: &[],
994 tests: &[],
995 assertions: &[],
996 test_suites: &[],
997 }
998 }
999}
1000
1001const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_SOLIDITY: SymbolPatterns = SymbolPatterns {
1006 functions: &[
1007 "function ",
1008 "modifier ",
1009 "constructor",
1010 "receive ",
1011 "fallback ",
1012 ],
1013 functions_prefix_paren: &[],
1014 classes: &["contract ", "interface ", "library ", "struct ", "enum "],
1015 variables: &[],
1016 imports: &["import "],
1017 tests: &[],
1018 assertions: &[],
1019 test_suites: &[],
1020};
1021
1022const SP_PROTOBUF: SymbolPatterns = SymbolPatterns {
1025 functions: &["rpc "],
1026 functions_prefix_paren: &[],
1027 classes: &["message ", "service ", "enum "],
1028 variables: &[],
1029 imports: &["import "],
1030 tests: &[],
1031 assertions: &[],
1032 test_suites: &[],
1033};
1034
1035const SP_ADA: SymbolPatterns = SymbolPatterns {
1037 functions: &["procedure ", "function "],
1038 functions_prefix_paren: &[],
1039 classes: &["package ", "type ", "task ", "protected "],
1040 variables: &[],
1041 imports: &["with ", "use "],
1042 tests: &[],
1043 assertions: &[],
1044 test_suites: &[],
1045};
1046
1047const SP_VHDL: SymbolPatterns = SymbolPatterns {
1048 functions: &["function ", "procedure ", "process "],
1049 functions_prefix_paren: &[],
1050 classes: &["entity ", "architecture ", "package ", "component "],
1051 variables: &[],
1052 imports: &["library ", "use "],
1053 tests: &[],
1054 assertions: &[],
1055 test_suites: &[],
1056};
1057
1058const SP_VERILOG: SymbolPatterns = SymbolPatterns {
1059 functions: &["function ", "task "],
1060 functions_prefix_paren: &[],
1061 classes: &["module ", "interface ", "class ", "package "],
1062 variables: &[],
1063 imports: &["import ", "`include"],
1064 tests: &[],
1065 assertions: &[],
1066 test_suites: &[],
1067};
1068
1069const SP_TCL: SymbolPatterns = SymbolPatterns {
1070 functions: &["proc "],
1071 functions_prefix_paren: &[],
1072 classes: &[],
1073 variables: &[],
1074 imports: &["source ", "package require "],
1075 tests: &[],
1076 assertions: &[],
1077 test_suites: &[],
1078};
1079
1080const SP_PASCAL: SymbolPatterns = SymbolPatterns {
1081 functions: &["procedure ", "function "],
1082 functions_prefix_paren: &[],
1083 classes: &["type ", "class ", "record "],
1084 variables: &[],
1085 imports: &["uses "],
1086 tests: &[],
1087 assertions: &[],
1088 test_suites: &[],
1089};
1090
1091const SP_VB: SymbolPatterns = SymbolPatterns {
1092 functions: &[
1093 "Sub ",
1094 "Function ",
1095 "Private Sub ",
1096 "Public Sub ",
1097 "Private Function ",
1098 "Public Function ",
1099 ],
1100 functions_prefix_paren: &[],
1101 classes: &["Class ", "Module ", "Structure "],
1102 variables: &[],
1103 imports: &["Imports "],
1104 tests: &[],
1105 assertions: &[],
1106 test_suites: &[],
1107};
1108
1109const SP_LISP: SymbolPatterns = SymbolPatterns {
1110 functions: &["(defun ", "(defmacro ", "(define ", "(defmethod ", "(defn "],
1111 functions_prefix_paren: &[],
1112 classes: &["(defclass ", "(defstruct "],
1113 variables: &[],
1114 imports: &["(require ", "(import ", "(use-package "],
1115 tests: &[],
1116 assertions: &[],
1117 test_suites: &[],
1118};
1119
1120const SP_FORTRAN: SymbolPatterns = SymbolPatterns {
1122 functions: &["subroutine ", "function "],
1123 functions_prefix_paren: &[],
1124 classes: &["module ", "program ", "type "],
1125 variables: &[],
1126 imports: &["use ", "include "],
1127 tests: &[],
1128 assertions: &[],
1129 test_suites: &[],
1130};
1131
1132const SP_CRYSTAL: SymbolPatterns = SymbolPatterns {
1133 functions: &["def "],
1134 functions_prefix_paren: &[],
1135 classes: &["class ", "module ", "struct ", "enum "],
1136 variables: &[],
1137 imports: &["require "],
1138 tests: &[],
1139 assertions: &[],
1140 test_suites: &[],
1141};
1142
1143const SP_D: SymbolPatterns = SymbolPatterns {
1144 functions: &[],
1145 functions_prefix_paren: &[],
1146 classes: &["class ", "struct ", "interface ", "enum ", "template "],
1147 variables: &[],
1148 imports: &["import "],
1149 tests: &[],
1150 assertions: &[],
1151 test_suites: &[],
1152};
1153
1154const SP_CMAKE: SymbolPatterns = SymbolPatterns {
1155 functions: &["function(", "macro("],
1156 functions_prefix_paren: &[],
1157 classes: &[],
1158 variables: &[],
1159 imports: &["include(", "add_subdirectory("],
1160 tests: &[],
1161 assertions: &[],
1162 test_suites: &[],
1163};
1164
1165const SP_ELM: SymbolPatterns = SymbolPatterns {
1166 functions: &[],
1167 functions_prefix_paren: &[],
1168 classes: &["type "],
1169 variables: &[],
1170 imports: &["import "],
1171 tests: &[],
1172 assertions: &[],
1173 test_suites: &[],
1174};
1175
1176const SP_AWK: SymbolPatterns = SymbolPatterns {
1177 functions: &["function "],
1178 functions_prefix_paren: &[],
1179 classes: &[],
1180 variables: &[],
1181 imports: &[],
1182 tests: &[],
1183 assertions: &[],
1184 test_suites: &[],
1185};
1186
1187const SP_RUST: SymbolPatterns = SymbolPatterns {
1188 functions: &[
1189 "fn ",
1190 "pub fn ",
1191 "pub(crate) fn ",
1192 "pub(super) fn ",
1193 "async fn ",
1194 "pub async fn ",
1195 "pub(crate) async fn ",
1196 "unsafe fn ",
1197 "pub unsafe fn ",
1198 "pub(crate) unsafe fn ",
1199 "const fn ",
1200 "pub const fn ",
1201 "pub(crate) const fn ",
1202 "extern fn ",
1203 "pub extern fn ",
1204 ],
1205 functions_prefix_paren: &[],
1206 classes: &[
1207 "struct ",
1208 "pub struct ",
1209 "pub(crate) struct ",
1210 "enum ",
1211 "pub enum ",
1212 "pub(crate) enum ",
1213 "trait ",
1214 "pub trait ",
1215 "pub(crate) trait ",
1216 "impl ",
1217 "impl<",
1218 "type ",
1219 "pub type ",
1220 "pub(crate) type ",
1221 ],
1222 variables: &["let ", "let mut "],
1223 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
1224 tests: &[
1226 "#[test]",
1227 "#[tokio::test]",
1228 "#[actix_web::test]",
1229 "#[rstest]",
1230 "#[test_case",
1231 ],
1232 assertions: &[
1233 "assert_eq!(",
1234 "assert_ne!(",
1235 "assert!(",
1236 "assert_matches!(",
1237 "assert_err!(",
1238 "assert_ok!(",
1239 ],
1240 test_suites: &[],
1241};
1242
1243const SP_PYTHON: SymbolPatterns = SymbolPatterns {
1244 functions: &["def ", "async def "],
1245 functions_prefix_paren: &[],
1246 classes: &["class "],
1247 variables: &[],
1248 imports: &["import ", "from "],
1249 tests: &["def test_", "async def test_", "class Test"],
1251 assertions: &[
1252 "self.assertEqual(",
1253 "self.assertNotEqual(",
1254 "self.assertTrue(",
1255 "self.assertFalse(",
1256 "self.assertIsNone(",
1257 "self.assertIsNotNone(",
1258 "self.assertIn(",
1259 "self.assertNotIn(",
1260 "self.assertRaises(",
1261 "self.assertAlmostEqual(",
1262 ],
1263 test_suites: &[],
1264};
1265
1266const SP_JS: SymbolPatterns = SymbolPatterns {
1267 functions: &[
1268 "function ",
1269 "async function ",
1270 "export function ",
1271 "export async function ",
1272 "export default function ",
1273 ],
1274 functions_prefix_paren: &[],
1275 classes: &["class ", "export class ", "export default class "],
1276 variables: &[
1277 "var ",
1278 "let ",
1279 "const ",
1280 "export var ",
1281 "export let ",
1282 "export const ",
1283 ],
1284 imports: &["import "],
1285 tests: &[
1287 "describe(",
1288 "it(",
1289 "test(",
1290 "it.each(",
1291 "test.each(",
1292 "describe.each(",
1293 ],
1294 assertions: &["expect("],
1295 test_suites: &[],
1296};
1297
1298const SP_TS: SymbolPatterns = SymbolPatterns {
1299 functions: &[
1300 "function ",
1301 "async function ",
1302 "export function ",
1303 "export async function ",
1304 "export default function ",
1305 ],
1306 functions_prefix_paren: &[],
1307 classes: &[
1308 "class ",
1309 "export class ",
1310 "export default class ",
1311 "abstract class ",
1312 "export abstract class ",
1313 "interface ",
1314 "export interface ",
1315 "declare class ",
1316 "declare interface ",
1317 ],
1318 variables: &[
1319 "var ",
1320 "let ",
1321 "const ",
1322 "export var ",
1323 "export let ",
1324 "export const ",
1325 ],
1326 imports: &["import "],
1327 tests: &[
1329 "describe(",
1330 "it(",
1331 "test(",
1332 "it.each(",
1333 "test.each(",
1334 "describe.each(",
1335 ],
1336 assertions: &["expect("],
1337 test_suites: &[],
1338};
1339
1340const SP_GO: SymbolPatterns = SymbolPatterns {
1341 functions: &["func "],
1342 functions_prefix_paren: &[],
1343 classes: &["type "],
1344 variables: &["var "],
1345 imports: &["import "],
1346 tests: &["func Test", "func Benchmark", "func Fuzz"],
1348 assertions: &[],
1349 test_suites: &[],
1350};
1351
1352const SP_JAVA: SymbolPatterns = SymbolPatterns {
1353 functions: &[],
1354 functions_prefix_paren: &[],
1355 classes: &[
1356 "class ",
1357 "public class ",
1358 "private class ",
1359 "protected class ",
1360 "abstract class ",
1361 "final class ",
1362 "public abstract class ",
1363 "public final class ",
1364 "interface ",
1365 "public interface ",
1366 "enum ",
1367 "public enum ",
1368 "record ",
1369 "public record ",
1370 "@interface ",
1371 ],
1372 variables: &[],
1373 imports: &["import "],
1374 tests: &[
1376 "@Test",
1377 "@ParameterizedTest",
1378 "@RepeatedTest",
1379 "@TestFactory",
1380 "@TestTemplate",
1381 ],
1382 assertions: &[
1383 "assertEquals(",
1384 "assertNotEquals(",
1385 "assertTrue(",
1386 "assertFalse(",
1387 "assertNull(",
1388 "assertNotNull(",
1389 "assertThat(",
1390 "assertThrows(",
1391 "assertAll(",
1392 "assertArrayEquals(",
1393 "assertIterableEquals(",
1394 "assertLinesMatch(",
1395 ],
1396 test_suites: &[],
1397};
1398
1399const SP_CSHARP: SymbolPatterns = SymbolPatterns {
1400 functions: &[],
1401 functions_prefix_paren: &[],
1402 classes: &[
1403 "class ",
1404 "public class ",
1405 "private class ",
1406 "protected class ",
1407 "internal class ",
1408 "abstract class ",
1409 "sealed class ",
1410 "static class ",
1411 "partial class ",
1412 "public abstract class ",
1413 "public sealed class ",
1414 "public static class ",
1415 "interface ",
1416 "public interface ",
1417 "internal interface ",
1418 "enum ",
1419 "public enum ",
1420 "struct ",
1421 "public struct ",
1422 "record ",
1423 "public record ",
1424 ],
1425 variables: &["var "],
1426 imports: &["using "],
1427 tests: &[
1429 "[TestMethod]",
1430 "[Test]",
1431 "[Fact]",
1432 "[Theory]",
1433 "[TestCase(",
1434 "[DataRow(",
1435 "[InlineData(",
1436 "[MemberData(",
1437 ],
1438 assertions: &[
1439 "Assert.AreEqual(",
1440 "Assert.AreNotEqual(",
1441 "Assert.IsTrue(",
1442 "Assert.IsFalse(",
1443 "Assert.IsNull(",
1444 "Assert.IsNotNull(",
1445 "Assert.Equal(",
1446 "Assert.NotEqual(",
1447 "Assert.True(",
1448 "Assert.False(",
1449 "Assert.That(",
1450 "Assert.Contains(",
1451 "Assert.Throws(",
1452 "Assert.ThrowsAsync(",
1453 "Assert.IsInstanceOfType(",
1454 ],
1455 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
1456};
1457
1458const TEST_PATTERNS_C_CPP: &[&str] = &[
1460 "TEST(",
1462 "TEST_F(",
1463 "TEST_P(",
1464 "TYPED_TEST(",
1465 "TYPED_TEST_P(",
1466 "INSTANTIATE_TEST_SUITE_P(",
1467 "INSTANTIATE_TYPED_TEST_SUITE_P(",
1468 "TEST_CASE(",
1470 "SECTION(",
1471 "SCENARIO(",
1472 "SCENARIO_METHOD(",
1473 "TEST_CASE_METHOD(",
1474 "BOOST_AUTO_TEST_CASE(",
1476 "BOOST_FIXTURE_TEST_CASE(",
1477 "BOOST_AUTO_TEST_SUITE(",
1478 "BOOST_PARAM_TEST_CASE(",
1479 "CPPUNIT_TEST(",
1481 "CPPUNIT_TEST_SUITE(",
1482 "RUN_TEST(",
1484 "TEST_IGNORE(",
1485 "TEST_FAIL(",
1486 "START_TEST(",
1488 "tcase_add_test(",
1489 "suite_create(",
1490 "cmocka_unit_test(",
1492 "cmocka_run_group_tests(",
1493 "IGNORE_TEST(",
1495 "TEST_GROUP(",
1496 "TEST_GROUP_BASE(",
1497];
1498
1499const ASSERT_PATTERNS_C_CPP: &[&str] = &[
1501 "ASSERT_EQ(",
1503 "ASSERT_NE(",
1504 "ASSERT_LT(",
1505 "ASSERT_LE(",
1506 "ASSERT_GT(",
1507 "ASSERT_GE(",
1508 "ASSERT_TRUE(",
1509 "ASSERT_FALSE(",
1510 "ASSERT_STREQ(",
1511 "ASSERT_STRNE(",
1512 "ASSERT_FLOAT_EQ(",
1513 "ASSERT_DOUBLE_EQ(",
1514 "ASSERT_NEAR(",
1515 "ASSERT_THROW(",
1516 "ASSERT_NO_THROW(",
1517 "ASSERT_ANY_THROW(",
1518 "EXPECT_EQ(",
1520 "EXPECT_NE(",
1521 "EXPECT_LT(",
1522 "EXPECT_LE(",
1523 "EXPECT_GT(",
1524 "EXPECT_GE(",
1525 "EXPECT_TRUE(",
1526 "EXPECT_FALSE(",
1527 "EXPECT_STREQ(",
1528 "EXPECT_STRNE(",
1529 "EXPECT_FLOAT_EQ(",
1530 "EXPECT_DOUBLE_EQ(",
1531 "EXPECT_NEAR(",
1532 "EXPECT_THROW(",
1533 "EXPECT_NO_THROW(",
1534 "EXPECT_ANY_THROW(",
1535 "REQUIRE(",
1537 "CHECK(",
1538 "REQUIRE_FALSE(",
1539 "CHECK_FALSE(",
1540 "REQUIRE_NOTHROW(",
1541 "CHECK_NOTHROW(",
1542 "REQUIRE_THROWS(",
1543 "CHECK_THROWS(",
1544 "REQUIRE_THAT(",
1545 "CHECK_THAT(",
1546 "TEST_ASSERT_EQUAL(",
1548 "TEST_ASSERT_EQUAL_INT(",
1549 "TEST_ASSERT_EQUAL_STRING(",
1550 "TEST_ASSERT_EQUAL_FLOAT(",
1551 "TEST_ASSERT_EQUAL_DOUBLE(",
1552 "TEST_ASSERT_EQUAL_PTR(",
1553 "TEST_ASSERT_TRUE(",
1554 "TEST_ASSERT_FALSE(",
1555 "TEST_ASSERT_NULL(",
1556 "TEST_ASSERT_NOT_NULL(",
1557 "TEST_ASSERT_BITS_HIGH(",
1558 "TEST_ASSERT_BITS_LOW(",
1559 "assert_int_equal(",
1561 "assert_int_not_equal(",
1562 "assert_string_equal(",
1563 "assert_string_not_equal(",
1564 "assert_true(",
1565 "assert_false(",
1566 "assert_null(",
1567 "assert_non_null(",
1568 "assert_ptr_equal(",
1569 "assert_memory_equal(",
1570 "assert_return_code(",
1571];
1572
1573const SUITE_PATTERNS_C_CPP: &[&str] = &[
1575 "TEST_GROUP(",
1576 "TEST_GROUP_BASE(",
1577 "BOOST_AUTO_TEST_SUITE(",
1578 "CPPUNIT_TEST_SUITE(",
1579 "CPPUNIT_TEST_SUITE_END(",
1580];
1581
1582const SP_C: SymbolPatterns = SymbolPatterns {
1583 functions: &[],
1585 functions_prefix_paren: &[
1586 "void ",
1587 "int ",
1588 "char ",
1589 "float ",
1590 "double ",
1591 "long ",
1592 "unsigned ",
1593 "size_t ",
1594 "static ",
1595 "inline ",
1596 "const ",
1597 "extern ",
1598 ],
1599 classes: &[
1600 "struct ",
1601 "typedef struct ",
1602 "union ",
1603 "typedef union ",
1604 "typedef enum ",
1605 ],
1606 variables: &[],
1607 imports: &["#include "],
1608 tests: TEST_PATTERNS_C_CPP,
1609 assertions: ASSERT_PATTERNS_C_CPP,
1610 test_suites: SUITE_PATTERNS_C_CPP,
1611};
1612
1613const SP_CPP: SymbolPatterns = SymbolPatterns {
1614 functions: &[
1616 "virtual ", "explicit ", "~", "operator", ],
1621 functions_prefix_paren: &[
1622 "void ",
1623 "bool ",
1624 "int ",
1625 "char ",
1626 "float ",
1627 "double ",
1628 "long ",
1629 "unsigned ",
1630 "size_t ",
1631 "auto ",
1632 "static ",
1633 "inline ",
1634 "constexpr ",
1635 "const ",
1636 "extern ",
1637 ],
1638 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1640 variables: &[],
1641 imports: &["#include "],
1642 tests: TEST_PATTERNS_C_CPP,
1643 assertions: ASSERT_PATTERNS_C_CPP,
1644 test_suites: SUITE_PATTERNS_C_CPP,
1645};
1646
1647const SP_SHELL: SymbolPatterns = SymbolPatterns {
1648 functions: &["function "],
1649 functions_prefix_paren: &[],
1650 classes: &[],
1651 variables: &["declare ", "local ", "export "],
1652 imports: &["source ", ". "],
1653 tests: &[],
1654 assertions: &[],
1655 test_suites: &[],
1656};
1657
1658const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1659 functions: &["function ", "Function "],
1660 functions_prefix_paren: &[],
1661 classes: &["class "],
1662 variables: &[],
1663 imports: &["Import-Module ", "using "],
1664 tests: &["Describe ", "It ", "Context "],
1666 assertions: &[],
1667 test_suites: &[],
1668};
1669
1670const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1671 functions: &[
1672 "fun ",
1673 "private fun ",
1674 "public fun ",
1675 "protected fun ",
1676 "internal fun ",
1677 "override fun ",
1678 "suspend fun ",
1679 "abstract fun ",
1680 "open fun ",
1681 "private suspend fun ",
1682 "public suspend fun ",
1683 ],
1684 functions_prefix_paren: &[],
1685 classes: &[
1686 "class ",
1687 "data class ",
1688 "sealed class ",
1689 "abstract class ",
1690 "open class ",
1691 "object ",
1692 "companion object",
1693 "interface ",
1694 "enum class ",
1695 "annotation class ",
1696 ],
1697 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1698 imports: &["import "],
1699 tests: &[
1701 "@Test",
1702 "@ParameterizedTest",
1703 "@RepeatedTest",
1704 "\"should ",
1705 "\"it ",
1706 ],
1707 assertions: &[
1708 "assertEquals(",
1709 "assertNotEquals(",
1710 "assertTrue(",
1711 "assertFalse(",
1712 "assertNull(",
1713 "assertNotNull(",
1714 "assertThat(",
1715 "assertThrows(",
1716 "shouldBe(",
1717 "shouldNotBe(",
1718 "shouldThrow(",
1719 ],
1720 test_suites: &[],
1721};
1722
1723const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1724 functions: &[
1725 "func ",
1726 "private func ",
1727 "public func ",
1728 "internal func ",
1729 "override func ",
1730 "open func ",
1731 "static func ",
1732 "class func ",
1733 "mutating func ",
1734 "private static func ",
1735 "public static func ",
1736 ],
1737 functions_prefix_paren: &[],
1738 classes: &[
1739 "class ",
1740 "struct ",
1741 "protocol ",
1742 "enum ",
1743 "extension ",
1744 "actor ",
1745 "public class ",
1746 "private class ",
1747 "open class ",
1748 "final class ",
1749 "public struct ",
1750 "private struct ",
1751 "public protocol ",
1752 ],
1753 variables: &[
1754 "var ",
1755 "let ",
1756 "private var ",
1757 "private let ",
1758 "static var ",
1759 "static let ",
1760 ],
1761 imports: &["import "],
1762 tests: &["func test", "func Test", "@Test"],
1764 assertions: &[
1765 "XCTAssertEqual(",
1766 "XCTAssertNotEqual(",
1767 "XCTAssertTrue(",
1768 "XCTAssertFalse(",
1769 "XCTAssertNil(",
1770 "XCTAssertNotNil(",
1771 "XCTAssertGreaterThan(",
1772 "XCTAssertLessThan(",
1773 "XCTAssertThrowsError(",
1774 "XCTAssertNoThrow(",
1775 "#expect(",
1776 ],
1777 test_suites: &[],
1778};
1779
1780const SP_RUBY: SymbolPatterns = SymbolPatterns {
1781 functions: &["def ", "private def ", "protected def "],
1782 functions_prefix_paren: &[],
1783 classes: &["class ", "module "],
1784 variables: &[],
1785 imports: &["require ", "require_relative "],
1786 tests: &["it ", "it(", "describe ", "context ", "test "],
1788 assertions: &[],
1789 test_suites: &[],
1790};
1791
1792const SP_SCALA: SymbolPatterns = SymbolPatterns {
1793 functions: &["def ", "private def ", "protected def ", "override def "],
1794 functions_prefix_paren: &[],
1795 classes: &[
1796 "class ",
1797 "case class ",
1798 "abstract class ",
1799 "sealed class ",
1800 "object ",
1801 "trait ",
1802 ],
1803 variables: &["val ", "var ", "lazy val "],
1804 imports: &["import "],
1805 tests: &["test(", "it(", "describe("],
1807 assertions: &[],
1808 test_suites: &[],
1809};
1810
1811const SP_PHP: SymbolPatterns = SymbolPatterns {
1812 functions: &[
1813 "function ",
1814 "public function ",
1815 "private function ",
1816 "protected function ",
1817 "static function ",
1818 "abstract function ",
1819 "final function ",
1820 "public static function ",
1821 "private static function ",
1822 "protected static function ",
1823 ],
1824 functions_prefix_paren: &[],
1825 classes: &[
1826 "class ",
1827 "abstract class ",
1828 "final class ",
1829 "interface ",
1830 "trait ",
1831 "enum ",
1832 ],
1833 variables: &[],
1834 imports: &[
1835 "use ",
1836 "require ",
1837 "require_once ",
1838 "include ",
1839 "include_once ",
1840 ],
1841 tests: &[
1843 "public function test",
1844 "function test",
1845 "#[Test]",
1846 "#[DataProvider(",
1847 ],
1848 assertions: &[],
1849 test_suites: &[],
1850};
1851
1852const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1853 functions: &[
1854 "def ",
1855 "defp ",
1856 "defmacro ",
1857 "defmacrop ",
1858 "defguard ",
1859 "defguardp ",
1860 ],
1861 functions_prefix_paren: &[],
1862 classes: &["defmodule ", "defprotocol ", "defimpl "],
1863 variables: &[],
1864 imports: &["import ", "alias ", "use ", "require "],
1865 tests: &["test ", "describe "],
1867 assertions: &[],
1868 test_suites: &[],
1869};
1870
1871const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1872 functions: &[],
1873 functions_prefix_paren: &[],
1874 classes: &["-module("],
1875 variables: &[],
1876 imports: &["-import(", "-include(", "-include_lib("],
1877 tests: &[],
1878 assertions: &[],
1879 test_suites: &[],
1880};
1881
1882const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1883 functions: &[
1884 "let ",
1885 "let rec ",
1886 "member ",
1887 "override ",
1888 "abstract member ",
1889 ],
1890 functions_prefix_paren: &[],
1891 classes: &["type "],
1892 variables: &["let mutable "],
1893 imports: &["open "],
1894 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1896 assertions: &[],
1897 test_suites: &[],
1898};
1899
1900const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1901 functions: &["def ", "private def ", "public def ", "protected def "],
1902 functions_prefix_paren: &[],
1903 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1904 variables: &[],
1905 imports: &["import "],
1906 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1908 assertions: &[],
1909 test_suites: &[],
1910};
1911
1912const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1913 functions: &[],
1914 functions_prefix_paren: &[],
1915 classes: &["class ", "data ", "newtype ", "type "],
1916 variables: &[],
1917 imports: &["import "],
1918 tests: &[],
1919 assertions: &[],
1920 test_suites: &[],
1921};
1922
1923const SP_LUA: SymbolPatterns = SymbolPatterns {
1924 functions: &["function ", "local function "],
1925 functions_prefix_paren: &[],
1926 classes: &[],
1927 variables: &["local "],
1928 imports: &[],
1929 tests: &["it(", "describe(", "pending("],
1931 assertions: &[],
1932 test_suites: &[],
1933};
1934
1935const SP_NIM: SymbolPatterns = SymbolPatterns {
1936 functions: &[
1937 "proc ",
1938 "func ",
1939 "method ",
1940 "iterator ",
1941 "converter ",
1942 "template ",
1943 "macro ",
1944 ],
1945 functions_prefix_paren: &[],
1946 classes: &["type "],
1947 variables: &["var ", "let ", "const "],
1948 imports: &["import ", "from "],
1949 tests: &["test "],
1951 assertions: &[],
1952 test_suites: &[],
1953};
1954
1955const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1956 functions: &["- (", "+ ("],
1957 functions_prefix_paren: &[],
1958 classes: &["@interface ", "@implementation ", "@protocol "],
1959 variables: &[],
1960 imports: &["#import ", "#include "],
1961 tests: &["- (void)test"],
1963 assertions: &[
1964 "XCTAssertEqual(",
1965 "XCTAssertNotEqual(",
1966 "XCTAssertTrue(",
1967 "XCTAssertFalse(",
1968 "XCTAssertNil(",
1969 "XCTAssertNotNil(",
1970 "XCTAssertGreaterThan(",
1971 "XCTAssertLessThan(",
1972 "XCTAssertThrowsError(",
1973 "XCTAssertNoThrow(",
1974 ],
1975 test_suites: &[],
1976};
1977
1978const SP_OCAML: SymbolPatterns = SymbolPatterns {
1979 functions: &["let ", "let rec "],
1980 functions_prefix_paren: &[],
1981 classes: &["type ", "module ", "class "],
1982 variables: &[],
1983 imports: &["open "],
1984 tests: &[],
1985 assertions: &[],
1986 test_suites: &[],
1987};
1988
1989const SP_PERL: SymbolPatterns = SymbolPatterns {
1990 functions: &["sub "],
1991 functions_prefix_paren: &[],
1992 classes: &["package "],
1993 variables: &["my ", "our ", "local "],
1994 imports: &["use ", "require "],
1995 tests: &[],
1996 assertions: &[],
1997 test_suites: &[],
1998};
1999
2000const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
2001 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
2002 functions_prefix_paren: &[],
2003 classes: &[
2004 "(defrecord ",
2005 "(defprotocol ",
2006 "(deftype ",
2007 "(definterface ",
2008 ],
2009 variables: &["(def ", "(defonce "],
2010 imports: &["(ns ", "(require "],
2011 tests: &["(deftest ", "(testing "],
2013 assertions: &[],
2014 test_suites: &[],
2015};
2016
2017const SP_JULIA: SymbolPatterns = SymbolPatterns {
2018 functions: &["function ", "macro "],
2019 functions_prefix_paren: &[],
2020 classes: &[
2021 "struct ",
2022 "mutable struct ",
2023 "abstract type ",
2024 "primitive type ",
2025 ],
2026 variables: &["const "],
2027 imports: &["import ", "using "],
2028 tests: &["@test ", "@testset "],
2030 assertions: &[],
2031 test_suites: &[],
2032};
2033
2034const SP_DART: SymbolPatterns = SymbolPatterns {
2035 functions: &[],
2036 functions_prefix_paren: &[],
2037 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
2038 variables: &["var ", "final ", "const ", "late "],
2039 imports: &["import "],
2040 tests: &["test(", "testWidgets(", "group("],
2042 assertions: &[],
2043 test_suites: &[],
2044};
2045
2046const SP_R: SymbolPatterns = SymbolPatterns {
2047 functions: &[],
2048 functions_prefix_paren: &[],
2049 classes: &[],
2050 variables: &[],
2051 imports: &["library(", "source("],
2052 tests: &["test_that(", "it(", "describe(", "expect_"],
2054 assertions: &[],
2055 test_suites: &[],
2056};
2057
2058const SP_SQL: SymbolPatterns = SymbolPatterns {
2059 functions: &[
2060 "create function ",
2061 "create or replace function ",
2062 "create procedure ",
2063 "create or replace procedure ",
2064 "CREATE FUNCTION ",
2065 "CREATE OR REPLACE FUNCTION ",
2066 "CREATE PROCEDURE ",
2067 "CREATE OR REPLACE PROCEDURE ",
2068 ],
2069 functions_prefix_paren: &[],
2070 classes: &[
2071 "create table ",
2072 "create view ",
2073 "create schema ",
2074 "CREATE TABLE ",
2075 "CREATE VIEW ",
2076 "CREATE SCHEMA ",
2077 ],
2078 variables: &["declare ", "DECLARE "],
2079 imports: &[],
2080 tests: &[],
2081 assertions: &[],
2082 test_suites: &[],
2083};
2084
2085const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
2086 functions: &["proc ", "PROC "],
2087 functions_prefix_paren: &[],
2088 classes: &[],
2089 variables: &[],
2090 imports: &["include ", "INCLUDE ", "%include "],
2091 tests: &[],
2092 assertions: &[],
2093 test_suites: &[],
2094};
2095
2096const SP_ZIG: SymbolPatterns = SymbolPatterns {
2097 functions: &[
2098 "fn ",
2099 "pub fn ",
2100 "export fn ",
2101 "inline fn ",
2102 "pub inline fn ",
2103 ],
2104 functions_prefix_paren: &[],
2105 classes: &[],
2106 variables: &["var ", "pub var "],
2107 imports: &[],
2108 tests: &["test \"", "test{"],
2110 assertions: &[],
2111 test_suites: &[],
2112};
2113
2114#[allow(clippy::struct_excessive_bools)]
2118#[derive(Clone, Copy)]
2119struct StaticLangConfig {
2120 line_comments: &'static [&'static str],
2121 block_comment: Option<(&'static str, &'static str)>,
2122 allow_single_quote_strings: bool,
2123 allow_double_quote_strings: bool,
2124 allow_triple_quote_strings: bool,
2125 allow_csharp_verbatim_strings: bool,
2126 symbol_patterns: SymbolPatterns,
2127 has_preprocessor: bool,
2129}
2130
2131#[allow(clippy::struct_excessive_bools)]
2132#[derive(Debug, Clone)]
2133struct ScanConfig {
2134 line_comments: &'static [&'static str],
2135 block_comment: Option<(&'static str, &'static str)>,
2136 allow_single_quote_strings: bool,
2137 allow_double_quote_strings: bool,
2138 allow_triple_quote_strings: bool,
2139 allow_csharp_verbatim_strings: bool,
2140 skip_lines: HashSet<usize>,
2141 symbol_patterns: SymbolPatterns,
2142 branch_keywords: &'static [&'static str],
2144 lsloc_strategy: LslocStrategy,
2146}
2147
2148const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
2158 line_comments: &["//"],
2159 block_comment: Some(("/*", "*/")),
2160 allow_single_quote_strings: true,
2161 allow_double_quote_strings: true,
2162 allow_triple_quote_strings: false,
2163 allow_csharp_verbatim_strings: false,
2164 symbol_patterns: SP_NONE,
2165 has_preprocessor: false,
2166};
2167
2168const HASH_BASE: StaticLangConfig = StaticLangConfig {
2172 line_comments: &["#"],
2173 block_comment: None,
2174 allow_single_quote_strings: true,
2175 allow_double_quote_strings: true,
2176 allow_triple_quote_strings: false,
2177 allow_csharp_verbatim_strings: false,
2178 symbol_patterns: SP_NONE,
2179 has_preprocessor: false,
2180};
2181
2182static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
2186 (
2188 Language::C,
2189 StaticLangConfig {
2190 symbol_patterns: SP_C,
2191 has_preprocessor: true,
2192 ..C_SLASH_BASE
2193 },
2194 ),
2195 (
2196 Language::Cpp,
2197 StaticLangConfig {
2198 symbol_patterns: SP_CPP,
2199 has_preprocessor: true,
2200 ..C_SLASH_BASE
2201 },
2202 ),
2203 (
2204 Language::ObjectiveC,
2205 StaticLangConfig {
2206 symbol_patterns: SP_OBJECTIVEC,
2207 has_preprocessor: true,
2208 ..C_SLASH_BASE
2209 },
2210 ),
2211 (
2213 Language::CSharp,
2214 StaticLangConfig {
2215 symbol_patterns: SP_CSHARP,
2216 allow_csharp_verbatim_strings: true,
2217 ..C_SLASH_BASE
2218 },
2219 ),
2220 (
2221 Language::Go,
2222 StaticLangConfig {
2223 symbol_patterns: SP_GO,
2224 ..C_SLASH_BASE
2225 },
2226 ),
2227 (
2228 Language::Java,
2229 StaticLangConfig {
2230 symbol_patterns: SP_JAVA,
2231 ..C_SLASH_BASE
2232 },
2233 ),
2234 (
2235 Language::JavaScript,
2236 StaticLangConfig {
2237 symbol_patterns: SP_JS,
2238 ..C_SLASH_BASE
2239 },
2240 ),
2241 (
2242 Language::TypeScript,
2243 StaticLangConfig {
2244 symbol_patterns: SP_TS,
2245 ..C_SLASH_BASE
2246 },
2247 ),
2248 (
2249 Language::Svelte,
2250 StaticLangConfig {
2251 symbol_patterns: SP_JS,
2252 ..C_SLASH_BASE
2253 },
2254 ),
2255 (
2256 Language::Vue,
2257 StaticLangConfig {
2258 symbol_patterns: SP_JS,
2259 ..C_SLASH_BASE
2260 },
2261 ),
2262 (
2263 Language::Dart,
2264 StaticLangConfig {
2265 symbol_patterns: SP_DART,
2266 ..C_SLASH_BASE
2267 },
2268 ),
2269 (
2270 Language::Groovy,
2271 StaticLangConfig {
2272 symbol_patterns: SP_GROOVY,
2273 ..C_SLASH_BASE
2274 },
2275 ),
2276 (
2277 Language::Kotlin,
2278 StaticLangConfig {
2279 symbol_patterns: SP_KOTLIN,
2280 ..C_SLASH_BASE
2281 },
2282 ),
2283 (
2284 Language::Scala,
2285 StaticLangConfig {
2286 symbol_patterns: SP_SCALA,
2287 ..C_SLASH_BASE
2288 },
2289 ),
2290 (
2291 Language::Scss,
2292 StaticLangConfig {
2293 symbol_patterns: SP_NONE,
2294 ..C_SLASH_BASE
2295 },
2296 ),
2297 (
2299 Language::Rust,
2300 StaticLangConfig {
2301 symbol_patterns: SP_RUST,
2302 allow_single_quote_strings: false,
2303 ..C_SLASH_BASE
2304 },
2305 ),
2306 (
2308 Language::Swift,
2309 StaticLangConfig {
2310 symbol_patterns: SP_SWIFT,
2311 allow_single_quote_strings: false,
2312 ..C_SLASH_BASE
2313 },
2314 ),
2315 (
2317 Language::Zig,
2318 StaticLangConfig {
2319 symbol_patterns: SP_ZIG,
2320 block_comment: None,
2321 ..C_SLASH_BASE
2322 },
2323 ),
2324 (
2326 Language::FSharp,
2327 StaticLangConfig {
2328 line_comments: &["//"],
2329 block_comment: Some(("(*", "*)")),
2330 allow_single_quote_strings: false,
2331 allow_double_quote_strings: true,
2332 symbol_patterns: SP_FSHARP,
2333 ..C_SLASH_BASE
2334 },
2335 ),
2336 (
2338 Language::Shell,
2339 StaticLangConfig {
2340 symbol_patterns: SP_SHELL,
2341 ..HASH_BASE
2342 },
2343 ),
2344 (
2345 Language::Elixir,
2346 StaticLangConfig {
2347 symbol_patterns: SP_ELIXIR,
2348 ..HASH_BASE
2349 },
2350 ),
2351 (
2352 Language::Perl,
2353 StaticLangConfig {
2354 symbol_patterns: SP_PERL,
2355 ..HASH_BASE
2356 },
2357 ),
2358 (
2359 Language::R,
2360 StaticLangConfig {
2361 symbol_patterns: SP_R,
2362 ..HASH_BASE
2363 },
2364 ),
2365 (
2366 Language::Ruby,
2367 StaticLangConfig {
2368 symbol_patterns: SP_RUBY,
2369 ..HASH_BASE
2370 },
2371 ),
2372 (
2374 Language::Python,
2375 StaticLangConfig {
2376 symbol_patterns: SP_PYTHON,
2377 allow_triple_quote_strings: true,
2378 ..HASH_BASE
2379 },
2380 ),
2381 (
2383 Language::PowerShell,
2384 StaticLangConfig {
2385 symbol_patterns: SP_POWERSHELL,
2386 block_comment: Some(("<#", "#>")),
2387 ..HASH_BASE
2388 },
2389 ),
2390 (
2392 Language::Nim,
2393 StaticLangConfig {
2394 symbol_patterns: SP_NIM,
2395 block_comment: Some(("#[", "]#")),
2396 ..HASH_BASE
2397 },
2398 ),
2399 (
2401 Language::Makefile,
2402 StaticLangConfig {
2403 symbol_patterns: SP_NONE,
2404 allow_single_quote_strings: false,
2405 allow_double_quote_strings: false,
2406 ..HASH_BASE
2407 },
2408 ),
2409 (
2410 Language::Dockerfile,
2411 StaticLangConfig {
2412 symbol_patterns: SP_NONE,
2413 allow_single_quote_strings: false,
2414 allow_double_quote_strings: false,
2415 ..HASH_BASE
2416 },
2417 ),
2418 (
2421 Language::Css,
2422 StaticLangConfig {
2423 line_comments: &[],
2424 block_comment: Some(("/*", "*/")),
2425 symbol_patterns: SP_NONE,
2426 ..C_SLASH_BASE
2427 },
2428 ),
2429 (
2431 Language::Html,
2432 StaticLangConfig {
2433 line_comments: &[],
2434 block_comment: Some(("<!--", "-->")),
2435 allow_single_quote_strings: false,
2436 allow_double_quote_strings: false,
2437 symbol_patterns: SP_NONE,
2438 ..C_SLASH_BASE
2439 },
2440 ),
2441 (
2442 Language::Xml,
2443 StaticLangConfig {
2444 line_comments: &[],
2445 block_comment: Some(("<!--", "-->")),
2446 allow_single_quote_strings: false,
2447 allow_double_quote_strings: false,
2448 symbol_patterns: SP_NONE,
2449 ..C_SLASH_BASE
2450 },
2451 ),
2452 (
2454 Language::Lua,
2455 StaticLangConfig {
2456 line_comments: &["--"],
2457 block_comment: Some(("--[[", "]]")),
2458 symbol_patterns: SP_LUA,
2459 ..C_SLASH_BASE
2460 },
2461 ),
2462 (
2464 Language::Haskell,
2465 StaticLangConfig {
2466 line_comments: &["--"],
2467 block_comment: Some(("{-", "-}")),
2468 symbol_patterns: SP_HASKELL,
2469 ..C_SLASH_BASE
2470 },
2471 ),
2472 (
2474 Language::Sql,
2475 StaticLangConfig {
2476 line_comments: &["--"],
2477 block_comment: Some(("/*", "*/")),
2478 allow_single_quote_strings: true,
2479 allow_double_quote_strings: false,
2480 symbol_patterns: SP_SQL,
2481 ..C_SLASH_BASE
2482 },
2483 ),
2484 (
2486 Language::Ocaml,
2487 StaticLangConfig {
2488 line_comments: &[],
2489 block_comment: Some(("(*", "*)")),
2490 allow_single_quote_strings: false,
2491 symbol_patterns: SP_OCAML,
2492 ..C_SLASH_BASE
2493 },
2494 ),
2495 (
2501 Language::Assembly,
2502 StaticLangConfig {
2503 line_comments: &[";"],
2504 block_comment: Some(("/*", "*/")),
2505 allow_single_quote_strings: false,
2506 allow_double_quote_strings: true,
2507 symbol_patterns: SP_ASSEMBLY,
2508 ..C_SLASH_BASE
2509 },
2510 ),
2511 (
2512 Language::Clojure,
2513 StaticLangConfig {
2514 line_comments: &[";"],
2515 block_comment: None,
2516 allow_single_quote_strings: false,
2517 symbol_patterns: SP_CLOJURE,
2518 ..C_SLASH_BASE
2519 },
2520 ),
2521 (
2523 Language::Erlang,
2524 StaticLangConfig {
2525 line_comments: &["%"],
2526 block_comment: None,
2527 allow_single_quote_strings: false,
2528 symbol_patterns: SP_ERLANG,
2529 ..C_SLASH_BASE
2530 },
2531 ),
2532 (
2534 Language::Php,
2535 StaticLangConfig {
2536 line_comments: &["//", "#"],
2537 block_comment: Some(("/*", "*/")),
2538 symbol_patterns: SP_PHP,
2539 ..C_SLASH_BASE
2540 },
2541 ),
2542 (
2544 Language::Julia,
2545 StaticLangConfig {
2546 line_comments: &["#"],
2547 block_comment: Some(("#=", "=#")),
2548 allow_single_quote_strings: false,
2549 allow_triple_quote_strings: true,
2550 symbol_patterns: SP_JULIA,
2551 ..C_SLASH_BASE
2552 },
2553 ),
2554 (
2557 Language::Solidity,
2558 StaticLangConfig {
2559 symbol_patterns: SP_SOLIDITY,
2560 ..C_SLASH_BASE
2561 },
2562 ),
2563 (
2565 Language::Protobuf,
2566 StaticLangConfig {
2567 symbol_patterns: SP_PROTOBUF,
2568 ..C_SLASH_BASE
2569 },
2570 ),
2571 (
2573 Language::Hcl,
2574 StaticLangConfig {
2575 line_comments: &["#", "//"],
2576 allow_single_quote_strings: false,
2577 symbol_patterns: SP_NONE,
2578 ..C_SLASH_BASE
2579 },
2580 ),
2581 (
2583 Language::GraphQl,
2584 StaticLangConfig {
2585 allow_single_quote_strings: false,
2586 allow_triple_quote_strings: true,
2587 symbol_patterns: SP_NONE,
2588 ..HASH_BASE
2589 },
2590 ),
2591 (
2594 Language::Ada,
2595 StaticLangConfig {
2596 line_comments: &["--"],
2597 block_comment: None,
2598 allow_single_quote_strings: false,
2599 symbol_patterns: SP_ADA,
2600 ..C_SLASH_BASE
2601 },
2602 ),
2603 (
2605 Language::Vhdl,
2606 StaticLangConfig {
2607 line_comments: &["--"],
2608 block_comment: None,
2609 allow_single_quote_strings: false,
2610 symbol_patterns: SP_VHDL,
2611 ..C_SLASH_BASE
2612 },
2613 ),
2614 (
2616 Language::Verilog,
2617 StaticLangConfig {
2618 allow_single_quote_strings: false,
2619 symbol_patterns: SP_VERILOG,
2620 ..C_SLASH_BASE
2621 },
2622 ),
2623 (
2625 Language::Tcl,
2626 StaticLangConfig {
2627 allow_single_quote_strings: false,
2628 symbol_patterns: SP_TCL,
2629 ..HASH_BASE
2630 },
2631 ),
2632 (
2634 Language::Pascal,
2635 StaticLangConfig {
2636 line_comments: &["//"],
2637 block_comment: Some(("{", "}")),
2638 allow_single_quote_strings: true,
2639 allow_double_quote_strings: false,
2640 symbol_patterns: SP_PASCAL,
2641 ..C_SLASH_BASE
2642 },
2643 ),
2644 (
2646 Language::VisualBasic,
2647 StaticLangConfig {
2648 line_comments: &["'"],
2649 block_comment: None,
2650 allow_single_quote_strings: false,
2651 allow_double_quote_strings: true,
2652 symbol_patterns: SP_VB,
2653 ..C_SLASH_BASE
2654 },
2655 ),
2656 (
2658 Language::Lisp,
2659 StaticLangConfig {
2660 line_comments: &[";"],
2661 block_comment: Some(("#|", "|#")),
2662 allow_single_quote_strings: false,
2663 symbol_patterns: SP_LISP,
2664 ..C_SLASH_BASE
2665 },
2666 ),
2667 (
2670 Language::Fortran,
2671 StaticLangConfig {
2672 line_comments: &["!"],
2673 block_comment: None,
2674 symbol_patterns: SP_FORTRAN,
2675 ..C_SLASH_BASE
2676 },
2677 ),
2678 (
2680 Language::Nix,
2681 StaticLangConfig {
2682 block_comment: Some(("/*", "*/")),
2683 allow_single_quote_strings: false,
2684 symbol_patterns: SP_NONE,
2685 ..HASH_BASE
2686 },
2687 ),
2688 (
2690 Language::Crystal,
2691 StaticLangConfig {
2692 symbol_patterns: SP_CRYSTAL,
2693 ..HASH_BASE
2694 },
2695 ),
2696 (
2698 Language::D,
2699 StaticLangConfig {
2700 symbol_patterns: SP_D,
2701 ..C_SLASH_BASE
2702 },
2703 ),
2704 (
2706 Language::Glsl,
2707 StaticLangConfig {
2708 allow_single_quote_strings: false,
2709 symbol_patterns: SP_NONE,
2710 ..C_SLASH_BASE
2711 },
2712 ),
2713 (
2715 Language::Cmake,
2716 StaticLangConfig {
2717 block_comment: Some(("#[[", "]]")),
2718 allow_single_quote_strings: false,
2719 symbol_patterns: SP_CMAKE,
2720 ..HASH_BASE
2721 },
2722 ),
2723 (
2725 Language::Elm,
2726 StaticLangConfig {
2727 line_comments: &["--"],
2728 block_comment: Some(("{-", "-}")),
2729 allow_single_quote_strings: false,
2730 symbol_patterns: SP_ELM,
2731 ..C_SLASH_BASE
2732 },
2733 ),
2734 (
2736 Language::Awk,
2737 StaticLangConfig {
2738 allow_single_quote_strings: false,
2739 symbol_patterns: SP_AWK,
2740 ..HASH_BASE
2741 },
2742 ),
2743];
2744
2745#[derive(Debug, Clone, Copy)]
2748struct IeeeFlags {
2749 has_preprocessor_directives: bool,
2751 blank_in_block_comment_as_comment: bool,
2753 collapse_continuation_lines: bool,
2755}
2756
2757#[derive(Debug, Clone, Copy)]
2758enum StringState {
2759 Single(char),
2760 Triple(&'static str),
2761 VerbatimDouble,
2762}
2763
2764#[allow(clippy::struct_excessive_bools)]
2765#[derive(Debug, Default)]
2766struct LineFacts {
2767 has_code: bool,
2768 has_single_comment: bool,
2769 has_multi_comment: bool,
2770 has_docstring: bool,
2771}
2772
2773fn process_string_char(
2777 state: StringState,
2778 chars: &[char],
2779 i: usize,
2780) -> (Option<StringState>, usize) {
2781 match state {
2782 StringState::Single(delim) => {
2783 if chars[i] == '\\' {
2784 return (Some(state), 2); }
2786 if chars[i] == delim {
2787 (None, 1)
2788 } else {
2789 (Some(state), 1)
2790 }
2791 }
2792 StringState::Triple(delim) => {
2793 if starts_with(chars, i, delim) {
2794 (None, delim.len())
2795 } else {
2796 (Some(state), 1)
2797 }
2798 }
2799 StringState::VerbatimDouble => {
2800 if starts_with(chars, i, "\"\"") {
2801 return (Some(state), 2); }
2803 if chars[i] == '"' {
2804 (None, 1)
2805 } else {
2806 (Some(state), 1)
2807 }
2808 }
2809 }
2810}
2811
2812fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2816 if starts_with(chars, i, close) {
2817 (false, close.len())
2818 } else {
2819 (true, 1)
2820 }
2821}
2822
2823fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2827 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2828 return Some((StringState::VerbatimDouble, 2));
2829 }
2830 if config.allow_triple_quote_strings {
2831 if starts_with(chars, i, "\"\"\"") {
2832 return Some((StringState::Triple("\"\"\""), 3));
2833 }
2834 if starts_with(chars, i, "'''") {
2835 return Some((StringState::Triple("'''"), 3));
2836 }
2837 }
2838 if config.allow_single_quote_strings && chars[i] == '\'' {
2839 return Some((StringState::Single('\''), 1));
2840 }
2841 if config.allow_double_quote_strings && chars[i] == '"' {
2842 return Some((StringState::Single('"'), 1));
2843 }
2844 None
2845}
2846
2847fn step_through_block_comment(
2853 chars: &[char],
2854 i: usize,
2855 block_comment: Option<(&'static str, &'static str)>,
2856 in_block_comment: &mut bool,
2857) -> usize {
2858 if let Some((_, close)) = block_comment {
2859 let (still_in, advance) = process_block_comment_char(chars, i, close);
2860 *in_block_comment = still_in;
2861 return advance;
2862 }
2863 0
2864}
2865
2866fn try_open_block_comment(
2869 chars: &[char],
2870 i: usize,
2871 block_comment: Option<(&'static str, &'static str)>,
2872) -> Option<usize> {
2873 let (open, _) = block_comment?;
2874 starts_with(chars, i, open).then_some(open.len())
2875}
2876
2877fn scan_line(
2881 chars: &[char],
2882 config: &ScanConfig,
2883 facts: &mut LineFacts,
2884 in_block_comment: &mut bool,
2885 string_state: &mut Option<StringState>,
2886) {
2887 let mut i = 0usize;
2888 while i < chars.len() {
2889 if let Some(state) = *string_state {
2891 facts.has_code = true;
2892 let (new_state, advance) = process_string_char(state, chars, i);
2893 *string_state = new_state;
2894 i += advance;
2895 continue;
2896 }
2897
2898 if *in_block_comment {
2900 facts.has_multi_comment = true;
2901 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2902 continue;
2903 }
2904
2905 if chars[i].is_whitespace() {
2907 i += 1;
2908 continue;
2909 }
2910
2911 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2913 facts.has_code = true;
2914 *string_state = Some(new_state);
2915 i += advance;
2916 continue;
2917 }
2918
2919 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2921 facts.has_multi_comment = true;
2922 *in_block_comment = true;
2923 i += advance;
2924 continue;
2925 }
2926
2927 if config
2929 .line_comments
2930 .iter()
2931 .any(|prefix| starts_with(chars, i, prefix))
2932 {
2933 facts.has_single_comment = true;
2934 break;
2935 }
2936
2937 facts.has_code = true;
2939 i += 1;
2940 }
2941}
2942
2943fn finalize_line_facts(
2948 facts: LineFacts,
2949 trimmed: &str,
2950 raw: &mut RawLineCounts,
2951 ieee: IeeeFlags,
2952 in_block_comment: bool,
2953 string_state: Option<StringState>,
2954 pending_continuation: &mut Option<LineFacts>,
2955) -> Option<LineFacts> {
2956 if ieee.has_preprocessor_directives
2960 && facts.has_code
2961 && !facts.has_single_comment
2962 && !facts.has_multi_comment
2963 && trimmed.starts_with('#')
2964 {
2965 raw.compiler_directive_lines += 1;
2966 }
2967
2968 let is_continuation = ieee.collapse_continuation_lines
2971 && !in_block_comment
2972 && string_state.is_none()
2973 && trimmed.ends_with('\\');
2974
2975 if is_continuation {
2976 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2977 pending.has_code |= facts.has_code;
2978 pending.has_single_comment |= facts.has_single_comment;
2979 pending.has_multi_comment |= facts.has_multi_comment;
2980 pending.has_docstring |= facts.has_docstring;
2981 return None; }
2983
2984 let emit = if let Some(pending) = pending_continuation.take() {
2986 LineFacts {
2987 has_code: pending.has_code | facts.has_code,
2988 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2989 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2990 has_docstring: pending.has_docstring | facts.has_docstring,
2991 }
2992 } else {
2993 facts
2994 };
2995 Some(emit)
2996}
2997
2998#[allow(clippy::needless_pass_by_value)]
3003#[allow(clippy::too_many_arguments)]
3004#[allow(clippy::many_single_char_names)] fn process_physical_line(
3006 line: &str,
3007 line_idx: usize,
3008 config: &ScanConfig,
3009 raw: &mut RawLineCounts,
3010 in_block_comment: &mut bool,
3011 string_state: &mut Option<StringState>,
3012 pending_continuation: &mut Option<LineFacts>,
3013 ieee: IeeeFlags,
3014) {
3015 raw.total_physical_lines += 1;
3016
3017 if config.skip_lines.contains(&line_idx) {
3018 raw.docstring_comment_lines += 1;
3019 return;
3020 }
3021
3022 let trimmed = line.trim();
3023 let mut facts = LineFacts::default();
3024
3025 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
3029 facts.has_multi_comment = true;
3030 }
3031
3032 let chars: Vec<char> = line.chars().collect();
3033 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
3034
3035 let Some(emit) = finalize_line_facts(
3036 facts,
3037 trimmed,
3038 raw,
3039 ieee,
3040 *in_block_comment,
3041 *string_state,
3042 pending_continuation,
3043 ) else {
3044 return;
3045 };
3046
3047 classify_line(raw, &emit, trimmed);
3048
3049 if emit.has_code {
3050 use std::hash::{DefaultHasher, Hash, Hasher};
3051 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
3052 raw.functions += f;
3053 raw.classes += c;
3054 raw.variables += v;
3055 raw.imports += i;
3056 raw.test_count += t;
3057 raw.test_assertion_count += a;
3058 raw.test_suite_count += s;
3059
3060 raw.cyclomatic_complexity +=
3062 count_branch_in_line(trimmed.as_bytes(), config.branch_keywords);
3063
3064 match config.lsloc_strategy {
3066 LslocStrategy::Semicolons => {
3067 let semi = u32::try_from(trimmed.bytes().filter(|&b| b == b';').count())
3068 .unwrap_or(u32::MAX);
3069 *raw.lsloc.get_or_insert(0) += semi;
3070 }
3071 LslocStrategy::NonContinuationNewlines => {
3072 let cont = trimmed.ends_with('\\')
3073 || trimmed.ends_with(',')
3074 || trimmed.ends_with('(')
3075 || trimmed.ends_with('[')
3076 || trimmed.ends_with('{');
3077 if !cont {
3078 *raw.lsloc.get_or_insert(0) += 1;
3079 }
3080 }
3081 LslocStrategy::Unsupported => {}
3082 }
3083
3084 let mut h = DefaultHasher::new();
3086 trimmed.hash(&mut h);
3087 raw.code_line_hashes.push(h.finish());
3088 }
3089}
3090
3091#[allow(clippy::needless_pass_by_value)]
3092fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
3093 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3094 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3095
3096 let mut raw = RawLineCounts::default();
3097 let mut warnings = Vec::new();
3098
3099 let mut in_block_comment = false;
3100 let mut string_state: Option<StringState> = None;
3101 let mut pending_continuation: Option<LineFacts> = None;
3103
3104 for (line_idx, line) in lines.iter().enumerate() {
3105 process_physical_line(
3106 line,
3107 line_idx,
3108 &config,
3109 &mut raw,
3110 &mut in_block_comment,
3111 &mut string_state,
3112 &mut pending_continuation,
3113 ieee,
3114 );
3115 }
3116
3117 if let Some(pending) = pending_continuation.take() {
3119 classify_line(&mut raw, &pending, "");
3120 }
3121
3122 if in_block_comment {
3123 warnings.push("unclosed block comment detected; result is best effort".into());
3124 }
3125 if string_state.is_some() {
3126 warnings.push("unclosed string literal detected; result is best effort".into());
3127 }
3128
3129 RawFileAnalysis {
3130 raw,
3131 parse_mode: if warnings.is_empty() {
3132 ParseMode::Lexical
3133 } else {
3134 ParseMode::LexicalBestEffort
3135 },
3136 warnings,
3137 style_analysis: None,
3138 }
3139}
3140
3141const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
3142 if facts.has_docstring {
3143 raw.docstring_comment_lines += 1;
3144 } else if !facts.has_code
3145 && !facts.has_single_comment
3146 && !facts.has_multi_comment
3147 && trimmed.is_empty()
3148 {
3149 raw.blank_only_lines += 1;
3150 } else if facts.has_code && facts.has_single_comment {
3151 raw.mixed_code_single_comment_lines += 1;
3152 } else if facts.has_code && facts.has_multi_comment {
3153 raw.mixed_code_multi_comment_lines += 1;
3154 } else if facts.has_code {
3155 raw.code_only_lines += 1;
3156 } else if facts.has_single_comment {
3157 raw.single_comment_only_lines += 1;
3158 } else if facts.has_multi_comment {
3159 raw.multi_comment_only_lines += 1;
3160 } else if trimmed.is_empty() {
3161 raw.blank_only_lines += 1;
3162 } else {
3163 raw.skipped_unknown_lines += 1;
3164 }
3165}
3166
3167fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3168 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
3169 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
3172 0
3173 } else if let Some(paren_pos) = trimmed.find('(') {
3174 if trimmed[..paren_pos].contains('=') {
3175 0
3176 } else {
3177 hit(patterns.functions_prefix_paren)
3178 }
3179 } else {
3180 0
3181 };
3182 let test_hit = hit(patterns.tests);
3183 let fn_hit = if test_hit == 0 {
3190 hit(patterns.functions) | fn_pp
3191 } else {
3192 0
3193 };
3194 let class_hit = if test_hit == 0 {
3195 hit(patterns.classes)
3196 } else {
3197 0
3198 };
3199 (
3200 fn_hit,
3201 class_hit,
3202 hit(patterns.variables),
3203 hit(patterns.imports),
3204 test_hit,
3205 hit(patterns.assertions),
3206 hit(patterns.test_suites),
3207 )
3208}
3209
3210fn is_word_boundary(line: &[u8], start: usize, end: usize) -> bool {
3212 let before_ok =
3213 start == 0 || (!line[start - 1].is_ascii_alphanumeric() && line[start - 1] != b'_');
3214 let after_ok = end >= line.len() || (!line[end].is_ascii_alphanumeric() && line[end] != b'_');
3215 before_ok && after_ok
3216}
3217
3218fn keyword_matches_at(line: &[u8], i: usize, kw_bytes: &[u8], word_kw: bool) -> bool {
3220 if &line[i..i + kw_bytes.len()] != kw_bytes {
3221 return false;
3222 }
3223 !word_kw || is_word_boundary(line, i, i + kw_bytes.len())
3224}
3225
3226fn count_branch_in_line(line: &[u8], keywords: &[&str]) -> u32 {
3231 if keywords.is_empty() || line.is_empty() {
3232 return 0;
3233 }
3234 let mut total = 0u32;
3235 for &kw in keywords {
3236 let kw_bytes = kw.as_bytes();
3237 let word_kw = kw.bytes().all(|b| b.is_ascii_alphabetic() || b == b'_');
3238 let mut i = 0usize;
3239 while i + kw_bytes.len() <= line.len() {
3240 if keyword_matches_at(line, i, kw_bytes, word_kw) {
3241 total += 1;
3242 i += kw_bytes.len();
3243 } else {
3244 i += 1;
3245 }
3246 }
3247 }
3248 total
3249}
3250
3251fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
3252 let needle_chars: Vec<char> = needle.chars().collect();
3253 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
3254}
3255
3256#[derive(Debug, Clone)]
3257struct PyContext {
3258 indent: usize,
3259 expect_docstring: bool,
3260}
3261
3262fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
3264 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
3265 contexts.pop();
3266 }
3267}
3268
3269fn py_handle_pending_indent(
3272 pending_block_indent: &mut Option<usize>,
3273 contexts: &mut Vec<PyContext>,
3274 indent: usize,
3275 trimmed: &str,
3276) {
3277 let Some(base_indent) = *pending_block_indent else {
3278 return;
3279 };
3280 if indent > base_indent {
3281 contexts.push(PyContext {
3282 indent,
3283 expect_docstring: true,
3284 });
3285 *pending_block_indent = None;
3286 } else if !trimmed.starts_with('@') {
3287 *pending_block_indent = None;
3288 }
3289}
3290
3291fn py_try_record_docstring(
3297 ctx: &mut PyContext,
3298 trimmed: &str,
3299 idx: usize,
3300 docstring_lines: &mut HashSet<usize>,
3301 active_docstring: &mut Option<(&'static str, usize)>,
3302) -> bool {
3303 if !ctx.expect_docstring {
3304 return false;
3305 }
3306 if let Some(delim) = docstring_delimiter(trimmed) {
3307 docstring_lines.insert(idx);
3308 ctx.expect_docstring = false;
3309 if !closes_triple_docstring(trimmed, delim, true) {
3310 *active_docstring = Some((delim, idx));
3311 }
3312 return true;
3313 }
3314 ctx.expect_docstring = false;
3315 false
3316}
3317
3318fn track_active_docstring(
3322 active_docstring: &mut Option<(&'static str, usize)>,
3323 docstring_lines: &mut HashSet<usize>,
3324 idx: usize,
3325 trimmed: &str,
3326) -> bool {
3327 let Some((delim, start_line)) = *active_docstring else {
3328 return false;
3329 };
3330 docstring_lines.insert(idx);
3331 if closes_triple_docstring(trimmed, delim, idx == start_line) {
3332 *active_docstring = None;
3333 }
3334 true
3335}
3336
3337fn try_record_docstring_if_context(
3340 contexts: &mut [PyContext],
3341 trimmed: &str,
3342 idx: usize,
3343 docstring_lines: &mut HashSet<usize>,
3344 active_docstring: &mut Option<(&'static str, usize)>,
3345) -> bool {
3346 let Some(ctx) = contexts.last_mut() else {
3347 return false;
3348 };
3349 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
3350}
3351
3352fn mark_unclosed_docstring_lines(
3354 active_docstring: Option<&(&'static str, usize)>,
3355 docstring_lines: &mut HashSet<usize>,
3356 num_lines: usize,
3357) {
3358 if let Some(&(_, start_line)) = active_docstring {
3359 for idx in start_line..num_lines {
3360 docstring_lines.insert(idx);
3361 }
3362 }
3363}
3364
3365fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
3366 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
3367 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
3368
3369 let mut docstring_lines = HashSet::new();
3370 let mut contexts = vec![PyContext {
3371 indent: 0,
3372 expect_docstring: true,
3373 }];
3374 let mut pending_block_indent: Option<usize> = None;
3375 let mut active_docstring: Option<(&'static str, usize)> = None;
3376
3377 for (idx, line) in lines.iter().enumerate() {
3378 let trimmed = line.trim();
3379 let indent = leading_indent(line);
3380
3381 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
3382 continue;
3383 }
3384
3385 if trimmed.is_empty() || trimmed.starts_with('#') {
3387 continue;
3388 }
3389
3390 py_pop_outdented_contexts(&mut contexts, indent);
3391 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
3392
3393 if try_record_docstring_if_context(
3394 &mut contexts,
3395 trimmed,
3396 idx,
3397 &mut docstring_lines,
3398 &mut active_docstring,
3399 ) {
3400 continue;
3401 }
3402
3403 if is_python_block_header(trimmed) {
3404 pending_block_indent = Some(indent);
3405 }
3406 }
3407
3408 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
3409
3410 docstring_lines
3411}
3412
3413fn leading_indent(line: &str) -> usize {
3414 line.chars().take_while(|c| c.is_whitespace()).count()
3415}
3416
3417fn is_python_block_header(trimmed: &str) -> bool {
3418 (trimmed.starts_with("def ")
3419 || trimmed.starts_with("async def ")
3420 || trimmed.starts_with("class "))
3421 && trimmed.ends_with(':')
3422}
3423
3424fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
3425 let mut idx = 0usize;
3426 let bytes = trimmed.as_bytes();
3427 while idx < bytes.len() {
3428 let c = bytes[idx] as char;
3429 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
3430 idx += 1;
3431 continue;
3432 }
3433 break;
3434 }
3435
3436 let rest = &trimmed[idx..];
3437 if rest.starts_with("\"\"\"") {
3438 Some("\"\"\"")
3439 } else if rest.starts_with("'''") {
3440 Some("'''")
3441 } else {
3442 None
3443 }
3444}
3445
3446fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
3447 let mut occurrences = 0usize;
3448 let mut search = trimmed;
3449 while let Some(index) = search.find(delim) {
3450 occurrences += 1;
3451 search = &search[index + delim.len()..];
3452 }
3453
3454 if same_line_as_start {
3455 occurrences >= 2
3456 } else {
3457 occurrences >= 1
3458 }
3459}
3460
3461#[cfg(feature = "tree-sitter")]
3466pub mod ts {
3467 use tree_sitter::Node;
3468
3469 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
3470
3471 struct SymbolKinds {
3473 function_def: &'static str,
3475 class_def: &'static str,
3477 test_fn_prefix: &'static str,
3480 test_class_prefix: &'static str,
3483 assertion_attr_prefix: &'static str,
3487 }
3488
3489 impl SymbolKinds {
3490 const fn none() -> Self {
3491 Self {
3492 function_def: "",
3493 class_def: "",
3494 test_fn_prefix: "",
3495 test_class_prefix: "",
3496 assertion_attr_prefix: "",
3497 }
3498 }
3499 }
3500
3501 fn analyze_lines(
3507 text: &str,
3508 ts_language: &tree_sitter::Language,
3509 comment_node_kinds: &[&str],
3510 docstring_stmt_kind: Option<&str>,
3511 symbols: &SymbolKinds,
3512 ) -> Option<RawFileAnalysis> {
3513 let mut parser = tree_sitter::Parser::new();
3514 parser.set_language(ts_language).ok()?;
3515 let tree = parser.parse(text, None)?;
3516
3517 let lines: Vec<&str> = text.split_terminator('\n').collect();
3518 let n = lines.len();
3519
3520 let mut has_code = vec![false; n];
3521 let mut has_comment = vec![false; n];
3522 let mut comment_is_block = vec![false; n];
3523 let mut has_docstring = vec![false; n];
3524
3525 let mut ctx = VisitCtx {
3527 source: text.as_bytes(),
3528 comment_kinds: comment_node_kinds,
3529 docstring_stmt_kind,
3530 has_code: &mut has_code,
3531 has_comment: &mut has_comment,
3532 comment_is_block: &mut comment_is_block,
3533 has_docstring: &mut has_docstring,
3534 };
3535 visit(tree.root_node(), &mut ctx);
3536
3537 let mut raw = RawLineCounts::default();
3538 classify_ts_lines(
3539 &lines,
3540 &has_code,
3541 &has_comment,
3542 &comment_is_block,
3543 &has_docstring,
3544 &mut raw,
3545 );
3546
3547 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
3549 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
3550 }
3551
3552 Some(RawFileAnalysis {
3553 raw,
3554 parse_mode: ParseMode::TreeSitter,
3555 warnings: Vec::new(),
3556 style_analysis: None,
3557 })
3558 }
3559
3560 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3562 for i in 0..node.child_count() {
3563 #[allow(clippy::cast_possible_truncation)]
3564 if let Some(child) = node.child(i as u32) {
3565 count_symbols(child, source, kinds, raw);
3566 }
3567 }
3568 }
3569
3570 fn try_count_function(
3572 node: Node,
3573 source: &[u8],
3574 kinds: &SymbolKinds,
3575 raw: &mut RawLineCounts,
3576 ) -> bool {
3577 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
3578 return false;
3579 }
3580 let name = node
3581 .child_by_field_name("name")
3582 .and_then(|n| n.utf8_text(source).ok())
3583 .unwrap_or("");
3584 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
3585 raw.test_count += 1;
3586 } else {
3587 raw.functions += 1;
3588 }
3589 recurse_children(node, source, kinds, raw);
3590 true
3591 }
3592
3593 fn try_count_class(
3595 node: Node,
3596 source: &[u8],
3597 kinds: &SymbolKinds,
3598 raw: &mut RawLineCounts,
3599 ) -> bool {
3600 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
3601 return false;
3602 }
3603 let name = node
3604 .child_by_field_name("name")
3605 .and_then(|n| n.utf8_text(source).ok())
3606 .unwrap_or("");
3607 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
3608 raw.test_count += 1;
3609 } else {
3610 raw.classes += 1;
3611 }
3612 recurse_children(node, source, kinds, raw);
3613 true
3614 }
3615
3616 fn try_count_assertion(
3619 node: Node,
3620 source: &[u8],
3621 kinds: &SymbolKinds,
3622 raw: &mut RawLineCounts,
3623 ) -> bool {
3624 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
3625 return false;
3626 }
3627 let Some(func) = node.child_by_field_name("function") else {
3628 return false;
3629 };
3630 if func.kind() != "attribute" {
3631 return false;
3632 }
3633 let attr_text = func
3634 .child_by_field_name("attribute")
3635 .and_then(|n| n.utf8_text(source).ok())
3636 .unwrap_or("");
3637 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
3638 return false;
3639 }
3640 raw.test_assertion_count += 1;
3641 true
3642 }
3643
3644 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
3647 if try_count_function(node, source, kinds, raw) {
3648 return;
3649 }
3650 if try_count_class(node, source, kinds, raw) {
3651 return;
3652 }
3653 if try_count_assertion(node, source, kinds, raw) {
3654 return;
3655 }
3656 recurse_children(node, source, kinds, raw);
3657 }
3658
3659 #[allow(clippy::struct_excessive_bools)]
3662 #[derive(Clone, Copy)]
3663 struct TsLineFlags {
3664 has_code: bool,
3665 has_comment: bool,
3666 comment_is_block: bool,
3667 has_docstring: bool,
3668 }
3669
3670 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
3672 if trimmed.is_empty() {
3673 raw.blank_only_lines += 1;
3674 } else if flags.has_docstring && !flags.has_code {
3675 raw.docstring_comment_lines += 1;
3676 } else if flags.has_code && flags.has_comment {
3677 if flags.comment_is_block {
3679 raw.mixed_code_multi_comment_lines += 1;
3680 } else {
3681 raw.mixed_code_single_comment_lines += 1;
3682 }
3683 } else if flags.has_comment {
3684 if flags.comment_is_block {
3685 raw.multi_comment_only_lines += 1;
3686 } else {
3687 raw.single_comment_only_lines += 1;
3688 }
3689 } else {
3690 raw.code_only_lines += 1;
3691 }
3692 }
3693
3694 fn classify_ts_lines(
3696 lines: &[&str],
3697 has_code: &[bool],
3698 has_comment: &[bool],
3699 comment_is_block: &[bool],
3700 has_docstring: &[bool],
3701 raw: &mut RawLineCounts,
3702 ) {
3703 for i in 0..lines.len() {
3704 raw.total_physical_lines += 1;
3705 classify_ts_line(
3706 lines[i].trim(),
3707 TsLineFlags {
3708 has_code: has_code[i],
3709 has_comment: has_comment[i],
3710 comment_is_block: comment_is_block[i],
3711 has_docstring: has_docstring[i],
3712 },
3713 raw,
3714 );
3715 }
3716 }
3717
3718 struct VisitCtx<'a> {
3719 source: &'a [u8],
3720 comment_kinds: &'a [&'a str],
3721 docstring_stmt_kind: Option<&'a str>,
3722 has_code: &'a mut Vec<bool>,
3723 has_comment: &'a mut Vec<bool>,
3724 comment_is_block: &'a mut Vec<bool>,
3725 has_docstring: &'a mut Vec<bool>,
3726 }
3727
3728 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
3730 let start_row = node.start_position().row;
3731 let end_row = node.end_position().row;
3732 let first_two = node
3733 .utf8_text(ctx.source)
3734 .unwrap_or("")
3735 .get(..2)
3736 .unwrap_or("");
3737 let is_block = first_two == "/*" || first_two == "<#";
3738 for row in start_row..=end_row {
3739 if row < ctx.has_comment.len() {
3740 ctx.has_comment[row] = true;
3741 if is_block {
3742 ctx.comment_is_block[row] = true;
3743 }
3744 }
3745 }
3746 }
3747
3748 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
3751 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
3752 return false;
3753 };
3754 if kind != stmt_kind || node.named_child_count() != 1 {
3755 return false;
3756 }
3757 let Some(child) = node.named_child(0) else {
3758 return false;
3759 };
3760 if child.kind() != "string" {
3761 return false;
3762 }
3763 let child_start = child.start_position().row;
3764 let child_end = child.end_position().row;
3765 for row in child_start..=child_end {
3766 if row < ctx.has_docstring.len() {
3767 ctx.has_docstring[row] = true;
3768 }
3769 }
3770 true
3771 }
3772
3773 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
3775 let start_row = node.start_position().row;
3776 let end_row = node.end_position().row;
3777 for row in start_row..=end_row {
3778 if row < ctx.has_code.len() {
3779 ctx.has_code[row] = true;
3780 }
3781 }
3782 }
3783
3784 #[allow(clippy::too_many_lines)]
3785 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
3786 let kind = node.kind();
3787
3788 if ctx.comment_kinds.contains(&kind) {
3790 visit_comment_node(node, ctx);
3791 return;
3792 }
3793
3794 if visit_maybe_docstring(node, kind, ctx) {
3796 return;
3797 }
3798
3799 if node.child_count() == 0 && !node.is_extra() {
3801 visit_leaf_code(node, ctx);
3802 return;
3803 }
3804
3805 for i in 0..node.child_count() {
3806 #[allow(clippy::cast_possible_truncation)]
3807 if let Some(child) = node.child(i as u32) {
3809 visit(child, ctx);
3810 }
3811 }
3812 }
3813
3814 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3815
3816 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3817 function_def: "function_definition",
3818 class_def: "class_definition",
3819 test_fn_prefix: "test_",
3820 test_class_prefix: "Test",
3821 assertion_attr_prefix: "assert",
3822 };
3823
3824 #[must_use]
3826 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3827 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3828 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3829 }
3830
3831 #[must_use]
3833 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3834 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3835 analyze_lines(
3836 text,
3837 &lang,
3838 &["comment"],
3839 Some("expression_statement"),
3840 &PYTHON_SYMBOLS,
3841 )
3842 }
3843}
3844
3845#[cfg(test)]
3846mod tests {
3847 use super::*;
3848
3849 #[test]
3850 fn python_docstrings_are_separated() {
3851 let input = r#""""module docs"""
3852
3853
3854def fn_a():
3855 """function docs"""
3856 value = 1 # trailing comment
3857 return value
3858"#;
3859
3860 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3861 assert_eq!(result.raw.docstring_comment_lines, 2);
3862 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3863 assert_eq!(result.raw.code_only_lines, 2);
3864 }
3865
3866 #[test]
3867 fn c_style_mixed_lines_are_captured() {
3868 let input = "int x = 1; // note\n/* block */\n";
3869 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3870 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3871 assert_eq!(result.raw.multi_comment_only_lines, 1);
3872 }
3873
3874 #[test]
3875 fn detect_language_by_shebang() {
3876 let language = detect_language(
3877 Path::new("script"),
3878 Some("#!/usr/bin/env bash"),
3879 &BTreeMap::new(),
3880 true,
3881 );
3882 assert_eq!(language, Some(Language::Shell));
3883 }
3884
3885 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3888 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3889 let r = &result.raw;
3890 (
3891 r.functions,
3892 r.classes,
3893 r.variables,
3894 r.imports,
3895 r.test_count,
3896 r.test_assertion_count,
3897 r.test_suite_count,
3898 )
3899 }
3900
3901 #[test]
3902 fn python_test_fn_not_double_counted() {
3903 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
3905 assert_eq!(f, 0, "test fn must not also increment functions");
3906 assert_eq!(t, 1, "must be counted as a test");
3907 assert_eq!(c, 0);
3908 }
3909
3910 #[test]
3911 fn python_test_class_not_double_counted() {
3912 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
3914 assert_eq!(c, 0, "test class must not also increment classes");
3915 assert_eq!(t, 1, "must be counted as a test");
3916 assert_eq!(f, 0);
3917 }
3918
3919 #[test]
3920 fn python_regular_fn_counts_as_function() {
3921 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
3922 assert_eq!(f, 1, "regular function must be counted");
3923 assert_eq!(t, 0);
3924 assert_eq!(c, 0);
3925 }
3926
3927 #[test]
3928 fn python_regular_class_counts_as_class() {
3929 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
3930 assert_eq!(c, 1, "regular class must be counted");
3931 assert_eq!(t, 0);
3932 assert_eq!(f, 0);
3933 }
3934
3935 #[test]
3936 fn go_test_fn_not_double_counted() {
3937 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
3938 assert_eq!(f, 0, "Go test func must not also increment functions");
3939 assert_eq!(t, 1, "must be counted as a test");
3940 }
3941
3942 #[test]
3943 fn go_benchmark_fn_not_double_counted() {
3944 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
3945 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
3946 assert_eq!(t, 1, "must be counted as a test");
3947 }
3948
3949 #[test]
3950 fn go_regular_fn_counts_as_function() {
3951 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
3952 assert_eq!(f, 1, "regular Go func must be counted");
3953 assert_eq!(t, 0);
3954 }
3955
3956 #[test]
3957 fn rust_test_attr_counts_as_test_not_function() {
3958 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
3960 assert_eq!(t, 1, "#[test] must be counted as a test");
3961 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
3962 }
3963
3964 #[test]
3965 fn rust_fn_line_counts_as_function_not_test() {
3966 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
3968 assert_eq!(f, 1, "fn declaration must count as a function");
3969 assert_eq!(
3970 t, 0,
3971 "fn declaration line must not be double-counted as a test"
3972 );
3973 }
3974
3975 #[test]
3976 fn js_describe_counts_as_test_not_function() {
3977 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
3978 assert_eq!(t, 1, "describe must be counted as a test");
3979 assert_eq!(f, 0, "describe must not be counted as a function");
3980 }
3981
3982 #[test]
3983 fn js_regular_fn_counts_as_function() {
3984 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
3985 assert_eq!(f, 1, "JS function declaration must be counted");
3986 assert_eq!(t, 0);
3987 }
3988
3989 use std::collections::BTreeMap;
3992 use std::path::Path;
3993
3994 #[test]
3995 fn detect_language_rs_extension() {
3996 let lang = detect_language(Path::new("foo.rs"), None, &BTreeMap::new(), false);
3997 assert_eq!(lang, Some(Language::Rust));
3998 }
3999
4000 #[test]
4001 fn detect_language_py_extension() {
4002 let lang = detect_language(Path::new("foo.py"), None, &BTreeMap::new(), false);
4003 assert_eq!(lang, Some(Language::Python));
4004 }
4005
4006 #[test]
4007 fn detect_language_ts_extension() {
4008 let lang = detect_language(Path::new("app.ts"), None, &BTreeMap::new(), false);
4009 assert_eq!(lang, Some(Language::TypeScript));
4010 }
4011
4012 #[test]
4013 fn detect_language_js_extension() {
4014 let lang = detect_language(Path::new("app.js"), None, &BTreeMap::new(), false);
4015 assert_eq!(lang, Some(Language::JavaScript));
4016 }
4017
4018 #[test]
4019 fn detect_language_go_extension() {
4020 let lang = detect_language(Path::new("main.go"), None, &BTreeMap::new(), false);
4021 assert_eq!(lang, Some(Language::Go));
4022 }
4023
4024 #[test]
4025 fn detect_language_c_extension() {
4026 let lang = detect_language(Path::new("main.c"), None, &BTreeMap::new(), false);
4027 assert_eq!(lang, Some(Language::C));
4028 }
4029
4030 #[test]
4031 fn detect_language_cpp_extension() {
4032 let lang = detect_language(Path::new("main.cpp"), None, &BTreeMap::new(), false);
4033 assert_eq!(lang, Some(Language::Cpp));
4034 }
4035
4036 #[test]
4037 fn detect_language_java_extension() {
4038 let lang = detect_language(Path::new("Main.java"), None, &BTreeMap::new(), false);
4039 assert_eq!(lang, Some(Language::Java));
4040 }
4041
4042 #[test]
4043 fn detect_language_makefile_exact_name() {
4044 let lang = detect_language(Path::new("Makefile"), None, &BTreeMap::new(), false);
4045 assert_eq!(lang, Some(Language::Makefile));
4046 }
4047
4048 #[test]
4049 fn detect_language_dockerfile_exact_name() {
4050 let lang = detect_language(Path::new("Dockerfile"), None, &BTreeMap::new(), false);
4051 assert_eq!(lang, Some(Language::Dockerfile));
4052 }
4053
4054 #[test]
4055 fn detect_language_rakefile() {
4056 let lang = detect_language(Path::new("Rakefile"), None, &BTreeMap::new(), false);
4057 assert_eq!(lang, Some(Language::Ruby));
4058 }
4059
4060 #[test]
4061 fn detect_language_gemfile() {
4062 let lang = detect_language(Path::new("Gemfile"), None, &BTreeMap::new(), false);
4063 assert_eq!(lang, Some(Language::Ruby));
4064 }
4065
4066 #[test]
4067 fn detect_language_unknown_extension_returns_none() {
4068 let lang = detect_language(Path::new("foo.xyz123"), None, &BTreeMap::new(), false);
4069 assert_eq!(lang, None);
4070 }
4071
4072 #[test]
4073 fn detect_language_extension_override() {
4074 let mut overrides = BTreeMap::new();
4075 overrides.insert("h".into(), "cpp".into());
4076 let lang = detect_language(Path::new("header.h"), None, &overrides, false);
4077 assert_eq!(lang, Some(Language::Cpp));
4078 }
4079
4080 #[test]
4081 fn detect_language_shebang_python() {
4082 let lang = detect_language(
4083 Path::new("script"),
4084 Some("#!/usr/bin/env python3"),
4085 &BTreeMap::new(),
4086 true,
4087 );
4088 assert_eq!(lang, Some(Language::Python));
4089 }
4090
4091 #[test]
4092 fn detect_language_shebang_bash() {
4093 let lang = detect_language(
4094 Path::new("script"),
4095 Some("#!/bin/bash"),
4096 &BTreeMap::new(),
4097 true,
4098 );
4099 assert_eq!(lang, Some(Language::Shell));
4100 }
4101
4102 #[test]
4103 fn detect_language_shebang_ruby() {
4104 let lang = detect_language(
4105 Path::new("script"),
4106 Some("#!/usr/bin/env ruby"),
4107 &BTreeMap::new(),
4108 true,
4109 );
4110 assert_eq!(lang, Some(Language::Ruby));
4111 }
4112
4113 #[test]
4114 fn detect_language_shebang_disabled() {
4115 let lang = detect_language(
4117 Path::new("script"),
4118 Some("#!/usr/bin/env python3"),
4119 &BTreeMap::new(),
4120 false,
4121 );
4122 assert_eq!(lang, None);
4123 }
4124
4125 #[test]
4126 fn from_name_rust() {
4127 assert_eq!(Language::from_name("rust"), Some(Language::Rust));
4128 }
4129
4130 #[test]
4131 fn from_name_python() {
4132 assert_eq!(Language::from_name("python"), Some(Language::Python));
4133 }
4134
4135 #[test]
4136 fn from_name_unknown() {
4137 assert_eq!(Language::from_name("brainfuck"), None);
4138 }
4139
4140 #[test]
4141 fn from_name_roundtrip_all() {
4142 for lang in [
4144 Language::C,
4145 Language::Cpp,
4146 Language::CSharp,
4147 Language::Go,
4148 Language::Java,
4149 Language::JavaScript,
4150 Language::Python,
4151 Language::Rust,
4152 Language::Shell,
4153 Language::PowerShell,
4154 Language::TypeScript,
4155 Language::Assembly,
4156 Language::Clojure,
4157 Language::Css,
4158 Language::Dart,
4159 Language::Dockerfile,
4160 Language::Elixir,
4161 Language::Erlang,
4162 Language::FSharp,
4163 Language::Groovy,
4164 Language::Haskell,
4165 Language::Html,
4166 Language::Julia,
4167 Language::Kotlin,
4168 Language::Lua,
4169 Language::Makefile,
4170 Language::Nim,
4171 Language::ObjectiveC,
4172 Language::Ocaml,
4173 Language::Perl,
4174 Language::Php,
4175 Language::R,
4176 Language::Ruby,
4177 Language::Scala,
4178 Language::Scss,
4179 Language::Sql,
4180 Language::Svelte,
4181 Language::Swift,
4182 Language::Vue,
4183 Language::Xml,
4184 Language::Zig,
4185 ] {
4186 let slug = lang.as_slug();
4187 let roundtripped = Language::from_name(slug);
4188 assert_eq!(
4189 roundtripped,
4190 Some(lang),
4191 "from_name({slug:?}) should return {lang:?}"
4192 );
4193 }
4194 }
4195
4196 #[test]
4199 fn blank_in_block_comment_defaults_to_comment() {
4200 let input = "/*\n\n*/";
4202 let opts = AnalysisOptions {
4203 blank_in_block_comment_as_comment: true,
4204 ..Default::default()
4205 };
4206 let result = analyze_text(Language::C, input, opts);
4207 assert_eq!(
4208 result.raw.multi_comment_only_lines, 3,
4209 "all 3 block-comment lines must count as multi-comment with CountAsComment policy"
4210 );
4211 assert_eq!(
4212 result.raw.blank_only_lines, 0,
4213 "no blank lines expected with CountAsComment policy"
4214 );
4215 }
4216
4217 #[test]
4218 fn blank_in_block_comment_counted_as_blank_when_policy_false() {
4219 let input = "/*\n\n*/";
4221 let opts = AnalysisOptions {
4222 blank_in_block_comment_as_comment: false,
4223 ..Default::default()
4224 };
4225 let result = analyze_text(Language::C, input, opts);
4226 assert_eq!(
4227 result.raw.multi_comment_only_lines, 2,
4228 "opener and closer must count as multi-comment with CountAsBlank policy"
4229 );
4230 assert_eq!(
4231 result.raw.blank_only_lines, 1,
4232 "the blank line inside the block comment must count as blank with CountAsBlank policy"
4233 );
4234 }
4235
4236 #[test]
4239 fn continuation_lines_each_physical_default() {
4240 let input = "#define FOO \\\n 1 \\\n + 2\n";
4242 let opts = AnalysisOptions {
4243 collapse_continuation_lines: false,
4244 ..Default::default()
4245 };
4246 let result = analyze_text(Language::C, input, opts);
4247 assert_eq!(
4248 result.raw.total_physical_lines, 3,
4249 "3 physical lines expected"
4250 );
4251 assert_eq!(
4252 result.raw.code_only_lines, 3,
4253 "each physical line must count as code with EachPhysicalLine policy"
4254 );
4255 }
4256
4257 #[test]
4258 fn continuation_lines_collapse_to_logical() {
4259 let input = "#define FOO \\\n 1 \\\n + 2\n";
4261 let opts = AnalysisOptions {
4262 collapse_continuation_lines: true,
4263 ..Default::default()
4264 };
4265 let result = analyze_text(Language::C, input, opts);
4266 assert_eq!(
4267 result.raw.total_physical_lines, 3,
4268 "physical line count is always 3 regardless of policy"
4269 );
4270 assert_eq!(
4271 result.raw.code_only_lines, 1,
4272 "3 continuation lines must collapse to 1 logical code line"
4273 );
4274 }
4275}