1pub mod style;
5pub use style::{IndentStyle, StyleAnalysis, StyleGuideScore, StyleSignal};
6
7use std::collections::{BTreeMap, BTreeSet, HashSet};
8use std::path::Path;
9
10use serde::{Deserialize, Serialize};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
13#[serde(rename_all = "snake_case")]
14pub enum Language {
15 C,
16 Cpp,
17 CSharp,
18 Go,
19 Java,
20 JavaScript,
21 Python,
22 Rust,
23 Shell,
24 PowerShell,
25 TypeScript,
26 Assembly,
28 Clojure,
29 Css,
30 Dart,
31 Dockerfile,
32 Elixir,
33 Erlang,
34 FSharp,
35 Groovy,
36 Haskell,
37 Html,
38 Julia,
39 Kotlin,
40 Lua,
41 Makefile,
42 Nim,
43 ObjectiveC,
44 Ocaml,
45 Perl,
46 Php,
47 R,
48 Ruby,
49 Scala,
50 Scss,
51 Sql,
52 Svelte,
53 Swift,
54 Vue,
55 Xml,
56 Zig,
57}
58
59impl Language {
60 #[must_use]
61 pub const fn display_name(&self) -> &'static str {
62 match self {
63 Self::C => "C",
64 Self::Cpp => "C++",
65 Self::CSharp => "C#",
66 Self::Go => "Go",
67 Self::Java => "Java",
68 Self::JavaScript => "JavaScript",
69 Self::Python => "Python",
70 Self::Rust => "Rust",
71 Self::Shell => "Shell",
72 Self::PowerShell => "PowerShell",
73 Self::TypeScript => "TypeScript",
74 Self::Assembly => "Assembly",
75 Self::Clojure => "Clojure",
76 Self::Css => "CSS",
77 Self::Dart => "Dart",
78 Self::Dockerfile => "Dockerfile",
79 Self::Elixir => "Elixir",
80 Self::Erlang => "Erlang",
81 Self::FSharp => "F#",
82 Self::Groovy => "Groovy",
83 Self::Haskell => "Haskell",
84 Self::Html => "HTML",
85 Self::Julia => "Julia",
86 Self::Kotlin => "Kotlin",
87 Self::Lua => "Lua",
88 Self::Makefile => "Makefile",
89 Self::Nim => "Nim",
90 Self::ObjectiveC => "Objective-C",
91 Self::Ocaml => "OCaml",
92 Self::Perl => "Perl",
93 Self::Php => "PHP",
94 Self::R => "R",
95 Self::Ruby => "Ruby",
96 Self::Scala => "Scala",
97 Self::Scss => "SCSS",
98 Self::Sql => "SQL",
99 Self::Svelte => "Svelte",
100 Self::Swift => "Swift",
101 Self::Vue => "Vue",
102 Self::Xml => "XML",
103 Self::Zig => "Zig",
104 }
105 }
106
107 #[must_use]
108 pub const fn as_slug(&self) -> &'static str {
109 match self {
110 Self::C => "c",
111 Self::Cpp => "cpp",
112 Self::CSharp => "csharp",
113 Self::Go => "go",
114 Self::Java => "java",
115 Self::JavaScript => "javascript",
116 Self::Python => "python",
117 Self::Rust => "rust",
118 Self::Shell => "shell",
119 Self::PowerShell => "powershell",
120 Self::TypeScript => "typescript",
121 Self::Assembly => "assembly",
122 Self::Clojure => "clojure",
123 Self::Css => "css",
124 Self::Dart => "dart",
125 Self::Dockerfile => "dockerfile",
126 Self::Elixir => "elixir",
127 Self::Erlang => "erlang",
128 Self::FSharp => "fsharp",
129 Self::Groovy => "groovy",
130 Self::Haskell => "haskell",
131 Self::Html => "html",
132 Self::Julia => "julia",
133 Self::Kotlin => "kotlin",
134 Self::Lua => "lua",
135 Self::Makefile => "makefile",
136 Self::Nim => "nim",
137 Self::ObjectiveC => "objectivec",
138 Self::Ocaml => "ocaml",
139 Self::Perl => "perl",
140 Self::Php => "php",
141 Self::R => "r",
142 Self::Ruby => "ruby",
143 Self::Scala => "scala",
144 Self::Scss => "scss",
145 Self::Sql => "sql",
146 Self::Svelte => "svelte",
147 Self::Swift => "swift",
148 Self::Vue => "vue",
149 Self::Xml => "xml",
150 Self::Zig => "zig",
151 }
152 }
153
154 #[must_use]
155 pub fn from_name(name: &str) -> Option<Self> {
156 match name.trim().to_ascii_lowercase().as_str() {
157 "c" => Some(Self::C),
158 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
159 "csharp" | "c#" | "cs" => Some(Self::CSharp),
160 "go" | "golang" => Some(Self::Go),
161 "java" => Some(Self::Java),
162 "javascript" | "js" => Some(Self::JavaScript),
163 "python" | "py" => Some(Self::Python),
164 "rust" | "rs" => Some(Self::Rust),
165 "shell" | "sh" | "bash" => Some(Self::Shell),
166 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
167 "typescript" | "ts" => Some(Self::TypeScript),
168 "assembly" | "asm" => Some(Self::Assembly),
169 "clojure" | "clj" => Some(Self::Clojure),
170 "css" => Some(Self::Css),
171 "dart" => Some(Self::Dart),
172 "dockerfile" | "docker" => Some(Self::Dockerfile),
173 "elixir" | "ex" => Some(Self::Elixir),
174 "erlang" | "erl" => Some(Self::Erlang),
175 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
176 "groovy" => Some(Self::Groovy),
177 "haskell" | "hs" => Some(Self::Haskell),
178 "html" | "htm" => Some(Self::Html),
179 "julia" | "jl" => Some(Self::Julia),
180 "kotlin" | "kt" => Some(Self::Kotlin),
181 "lua" => Some(Self::Lua),
182 "makefile" | "make" | "mk" => Some(Self::Makefile),
183 "nim" => Some(Self::Nim),
184 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
185 "ocaml" | "ml" => Some(Self::Ocaml),
186 "perl" | "pl" => Some(Self::Perl),
187 "php" => Some(Self::Php),
188 "r" => Some(Self::R),
189 "ruby" | "rb" => Some(Self::Ruby),
190 "scala" => Some(Self::Scala),
191 "scss" | "sass" => Some(Self::Scss),
192 "sql" => Some(Self::Sql),
193 "svelte" => Some(Self::Svelte),
194 "swift" => Some(Self::Swift),
195 "vue" => Some(Self::Vue),
196 "xml" => Some(Self::Xml),
197 "zig" => Some(Self::Zig),
198 _ => None,
199 }
200 }
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize, Default)]
204pub struct RawLineCounts {
205 pub total_physical_lines: u64,
206 pub blank_only_lines: u64,
207 pub code_only_lines: u64,
208 pub single_comment_only_lines: u64,
209 pub multi_comment_only_lines: u64,
210 pub mixed_code_single_comment_lines: u64,
211 pub mixed_code_multi_comment_lines: u64,
212 pub docstring_comment_lines: u64,
213 pub skipped_unknown_lines: u64,
214 #[serde(default)]
216 pub functions: u64,
217 #[serde(default)]
219 pub classes: u64,
220 #[serde(default)]
222 pub variables: u64,
223 #[serde(default)]
225 pub imports: u64,
226 #[serde(default)]
230 pub compiler_directive_lines: u64,
231 #[serde(default)]
234 pub test_count: u64,
235 #[serde(default)]
238 pub test_assertion_count: u64,
239 #[serde(default)]
242 pub test_suite_count: u64,
243}
244
245#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
246#[serde(rename_all = "snake_case")]
247pub enum ParseMode {
248 Lexical,
249 LexicalBestEffort,
250 TreeSitter,
251}
252
253#[derive(Debug, Clone, Serialize, Deserialize)]
254pub struct RawFileAnalysis {
255 pub raw: RawLineCounts,
256 pub parse_mode: ParseMode,
257 pub warnings: Vec<String>,
258 #[serde(default, skip_serializing_if = "Option::is_none")]
260 pub style_analysis: Option<StyleAnalysis>,
261}
262
263#[derive(Debug, Clone, Copy)]
268pub struct AnalysisOptions {
269 pub blank_in_block_comment_as_comment: bool,
272 pub collapse_continuation_lines: bool,
275}
276
277impl Default for AnalysisOptions {
278 fn default() -> Self {
279 Self {
280 blank_in_block_comment_as_comment: true,
281 collapse_continuation_lines: false,
282 }
283 }
284}
285
286#[must_use]
287pub fn supported_languages() -> BTreeSet<Language> {
288 [
289 Language::Assembly,
290 Language::C,
291 Language::Clojure,
292 Language::Cpp,
293 Language::CSharp,
294 Language::Css,
295 Language::Dart,
296 Language::Dockerfile,
297 Language::Elixir,
298 Language::Erlang,
299 Language::FSharp,
300 Language::Go,
301 Language::Groovy,
302 Language::Haskell,
303 Language::Html,
304 Language::Java,
305 Language::JavaScript,
306 Language::Julia,
307 Language::Kotlin,
308 Language::Lua,
309 Language::Makefile,
310 Language::Nim,
311 Language::ObjectiveC,
312 Language::Ocaml,
313 Language::Perl,
314 Language::Php,
315 Language::PowerShell,
316 Language::Python,
317 Language::R,
318 Language::Ruby,
319 Language::Rust,
320 Language::Scala,
321 Language::Scss,
322 Language::Shell,
323 Language::Sql,
324 Language::Svelte,
325 Language::Swift,
326 Language::TypeScript,
327 Language::Vue,
328 Language::Xml,
329 Language::Zig,
330 ]
331 .into_iter()
332 .collect()
333}
334
335fn detect_by_shebang(line: &str) -> Option<Language> {
337 let lower = line.to_ascii_lowercase();
338 if !lower.starts_with("#!") {
339 return None;
340 }
341 if lower.contains("python") {
342 return Some(Language::Python);
343 }
344 if lower.contains("pwsh") || lower.contains("powershell") {
345 return Some(Language::PowerShell);
346 }
347 if lower.contains("bash")
348 || lower.contains("/sh")
349 || lower.contains("zsh")
350 || lower.contains("ksh")
351 {
352 return Some(Language::Shell);
353 }
354 if lower.contains("ruby") {
355 return Some(Language::Ruby);
356 }
357 if lower.contains("perl") {
358 return Some(Language::Perl);
359 }
360 if lower.contains("php") {
361 return Some(Language::Php);
362 }
363 if lower.contains("node") || lower.contains("nodejs") {
364 return Some(Language::JavaScript);
365 }
366 None
367}
368
369fn detect_by_extension(ext: &str) -> Option<Language> {
371 static EXT_MAP: &[(&str, Language)] = &[
373 ("c", Language::C),
374 ("h", Language::C),
375 ("cc", Language::Cpp),
376 ("cp", Language::Cpp),
377 ("cpp", Language::Cpp),
378 ("cxx", Language::Cpp),
379 ("hh", Language::Cpp),
380 ("hpp", Language::Cpp),
381 ("hxx", Language::Cpp),
382 ("cs", Language::CSharp),
383 ("go", Language::Go),
384 ("java", Language::Java),
385 ("js", Language::JavaScript),
386 ("mjs", Language::JavaScript),
387 ("cjs", Language::JavaScript),
388 ("py", Language::Python),
389 ("rs", Language::Rust),
390 ("sh", Language::Shell),
391 ("bash", Language::Shell),
392 ("zsh", Language::Shell),
393 ("ksh", Language::Shell),
394 ("ps1", Language::PowerShell),
395 ("psm1", Language::PowerShell),
396 ("psd1", Language::PowerShell),
397 ("ts", Language::TypeScript),
398 ("mts", Language::TypeScript),
399 ("cts", Language::TypeScript),
400 ("asm", Language::Assembly),
401 ("s", Language::Assembly),
402 ("clj", Language::Clojure),
403 ("cljs", Language::Clojure),
404 ("cljc", Language::Clojure),
405 ("edn", Language::Clojure),
406 ("css", Language::Css),
407 ("dart", Language::Dart),
408 ("ex", Language::Elixir),
409 ("exs", Language::Elixir),
410 ("erl", Language::Erlang),
411 ("hrl", Language::Erlang),
412 ("fs", Language::FSharp),
413 ("fsi", Language::FSharp),
414 ("fsx", Language::FSharp),
415 ("groovy", Language::Groovy),
416 ("gradle", Language::Groovy),
417 ("hs", Language::Haskell),
418 ("lhs", Language::Haskell),
419 ("html", Language::Html),
420 ("htm", Language::Html),
421 ("xhtml", Language::Html),
422 ("jl", Language::Julia),
423 ("kt", Language::Kotlin),
424 ("kts", Language::Kotlin),
425 ("lua", Language::Lua),
426 ("mk", Language::Makefile),
427 ("nim", Language::Nim),
428 ("nims", Language::Nim),
429 ("m", Language::ObjectiveC),
430 ("mm", Language::ObjectiveC),
431 ("ml", Language::Ocaml),
432 ("mli", Language::Ocaml),
433 ("pl", Language::Perl),
434 ("pm", Language::Perl),
435 ("t", Language::Perl),
436 ("php", Language::Php),
437 ("php3", Language::Php),
438 ("php4", Language::Php),
439 ("php5", Language::Php),
440 ("php7", Language::Php),
441 ("phtml", Language::Php),
442 ("r", Language::R),
443 ("rb", Language::Ruby),
444 ("rake", Language::Ruby),
445 ("scala", Language::Scala),
446 ("sc", Language::Scala),
447 ("scss", Language::Scss),
448 ("sass", Language::Scss),
449 ("sql", Language::Sql),
450 ("svelte", Language::Svelte),
451 ("swift", Language::Swift),
452 ("vue", Language::Vue),
453 ("xml", Language::Xml),
454 ("xsd", Language::Xml),
455 ("xsl", Language::Xml),
456 ("xslt", Language::Xml),
457 ("svg", Language::Xml),
458 ("zig", Language::Zig),
459 ];
460 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
461}
462
463fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
465 if filename == "Dockerfile"
467 || filename.starts_with("Dockerfile.")
468 || filename_lower == "dockerfile"
469 {
470 return Some(Language::Dockerfile);
471 }
472 if matches!(
474 filename,
475 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
476 ) {
477 return Some(Language::Makefile);
478 }
479 if matches!(
481 filename,
482 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
483 ) {
484 return Some(Language::Ruby);
485 }
486 None
487}
488
489#[must_use]
490#[allow(clippy::too_many_lines)]
491pub fn detect_language(
492 path: &Path,
493 first_line: Option<&str>,
494 extension_overrides: &BTreeMap<String, String>,
495 shebang_detection: bool,
496) -> Option<Language> {
497 let extension = path
498 .extension()
499 .and_then(|ext| ext.to_str())
500 .map(str::to_ascii_lowercase);
501
502 if let Some(ext) = extension.as_ref() {
504 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
505 if let Some(lang) = Language::from_name(override_name) {
506 return Some(lang);
507 }
508 }
509 }
510
511 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
513 let filename_lower = filename.to_ascii_lowercase();
514
515 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
516 return Some(lang);
517 }
518
519 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
521 return Some(lang);
522 }
523
524 if shebang_detection {
526 if let Some(line) = first_line {
527 if let Some(lang) = detect_by_shebang(line) {
528 return Some(lang);
529 }
530 }
531 }
532
533 None
534}
535
536#[must_use]
537pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
538 #[cfg(feature = "tree-sitter")]
540 {
541 match language {
542 Language::C | Language::Cpp => {
543 if let Some(mut result) = ts::analyze_c(text) {
544 result.style_analysis = style::analyze_style(language, text);
545 return result;
546 }
547 }
548 Language::Python => {
549 if let Some(result) = ts::analyze_python(text) {
550 return result;
551 }
552 }
553 _ => {}
554 }
555 }
556
557 let (mut config, has_preprocessor) = language_scan_config(language);
558
559 if language == Language::Python {
561 config.skip_lines = detect_python_docstring_lines(text);
562 }
563
564 let flags = IeeeFlags {
567 has_preprocessor_directives: has_preprocessor,
568 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
569 collapse_continuation_lines: options.collapse_continuation_lines,
570 };
571 let mut result = analyze_generic(text, config, flags);
572 result.style_analysis = style::analyze_style(language, text);
573 result
574}
575
576fn language_scan_config(language: Language) -> (ScanConfig, bool) {
584 let cfg = LANG_SCAN_TABLE
585 .iter()
586 .find_map(|&(l, c)| (l == language).then_some(c))
587 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
588 (
589 ScanConfig {
590 line_comments: cfg.line_comments,
591 block_comment: cfg.block_comment,
592 allow_single_quote_strings: cfg.allow_single_quote_strings,
593 allow_double_quote_strings: cfg.allow_double_quote_strings,
594 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
595 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
596 skip_lines: HashSet::new(),
597 symbol_patterns: cfg.symbol_patterns,
598 },
599 cfg.has_preprocessor,
600 )
601}
602
603#[derive(Debug, Clone, Copy)]
607struct SymbolPatterns {
608 functions: &'static [&'static str],
609 functions_prefix_paren: &'static [&'static str],
615 classes: &'static [&'static str],
616 variables: &'static [&'static str],
617 imports: &'static [&'static str],
618 tests: &'static [&'static str],
621 assertions: &'static [&'static str],
624 test_suites: &'static [&'static str],
627}
628
629impl SymbolPatterns {
630 const fn none() -> Self {
631 Self {
632 functions: &[],
633 functions_prefix_paren: &[],
634 classes: &[],
635 variables: &[],
636 imports: &[],
637 tests: &[],
638 assertions: &[],
639 test_suites: &[],
640 }
641 }
642}
643
644const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_RUST: SymbolPatterns = SymbolPatterns {
647 functions: &[
648 "fn ",
649 "pub fn ",
650 "pub(crate) fn ",
651 "pub(super) fn ",
652 "async fn ",
653 "pub async fn ",
654 "pub(crate) async fn ",
655 "unsafe fn ",
656 "pub unsafe fn ",
657 "pub(crate) unsafe fn ",
658 "const fn ",
659 "pub const fn ",
660 "pub(crate) const fn ",
661 "extern fn ",
662 "pub extern fn ",
663 ],
664 functions_prefix_paren: &[],
665 classes: &[
666 "struct ",
667 "pub struct ",
668 "pub(crate) struct ",
669 "enum ",
670 "pub enum ",
671 "pub(crate) enum ",
672 "trait ",
673 "pub trait ",
674 "pub(crate) trait ",
675 "impl ",
676 "impl<",
677 "type ",
678 "pub type ",
679 "pub(crate) type ",
680 ],
681 variables: &["let ", "let mut "],
682 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
683 tests: &[
685 "#[test]",
686 "#[tokio::test]",
687 "#[actix_web::test]",
688 "#[rstest]",
689 "#[test_case",
690 ],
691 assertions: &[
692 "assert_eq!(",
693 "assert_ne!(",
694 "assert!(",
695 "assert_matches!(",
696 "assert_err!(",
697 "assert_ok!(",
698 ],
699 test_suites: &[],
700};
701
702const SP_PYTHON: SymbolPatterns = SymbolPatterns {
703 functions: &["def ", "async def "],
704 functions_prefix_paren: &[],
705 classes: &["class "],
706 variables: &[],
707 imports: &["import ", "from "],
708 tests: &["def test_", "async def test_", "class Test"],
710 assertions: &[
711 "self.assertEqual(",
712 "self.assertNotEqual(",
713 "self.assertTrue(",
714 "self.assertFalse(",
715 "self.assertIsNone(",
716 "self.assertIsNotNone(",
717 "self.assertIn(",
718 "self.assertNotIn(",
719 "self.assertRaises(",
720 "self.assertAlmostEqual(",
721 ],
722 test_suites: &[],
723};
724
725const SP_JS: SymbolPatterns = SymbolPatterns {
726 functions: &[
727 "function ",
728 "async function ",
729 "export function ",
730 "export async function ",
731 "export default function ",
732 ],
733 functions_prefix_paren: &[],
734 classes: &["class ", "export class ", "export default class "],
735 variables: &[
736 "var ",
737 "let ",
738 "const ",
739 "export var ",
740 "export let ",
741 "export const ",
742 ],
743 imports: &["import "],
744 tests: &[
746 "describe(",
747 "it(",
748 "test(",
749 "it.each(",
750 "test.each(",
751 "describe.each(",
752 ],
753 assertions: &["expect("],
754 test_suites: &[],
755};
756
757const SP_TS: SymbolPatterns = SymbolPatterns {
758 functions: &[
759 "function ",
760 "async function ",
761 "export function ",
762 "export async function ",
763 "export default function ",
764 ],
765 functions_prefix_paren: &[],
766 classes: &[
767 "class ",
768 "export class ",
769 "export default class ",
770 "abstract class ",
771 "export abstract class ",
772 "interface ",
773 "export interface ",
774 "declare class ",
775 "declare interface ",
776 ],
777 variables: &[
778 "var ",
779 "let ",
780 "const ",
781 "export var ",
782 "export let ",
783 "export const ",
784 ],
785 imports: &["import "],
786 tests: &[
788 "describe(",
789 "it(",
790 "test(",
791 "it.each(",
792 "test.each(",
793 "describe.each(",
794 ],
795 assertions: &["expect("],
796 test_suites: &[],
797};
798
799const SP_GO: SymbolPatterns = SymbolPatterns {
800 functions: &["func "],
801 functions_prefix_paren: &[],
802 classes: &["type "],
803 variables: &["var "],
804 imports: &["import "],
805 tests: &["func Test", "func Benchmark", "func Fuzz"],
807 assertions: &[],
808 test_suites: &[],
809};
810
811const SP_JAVA: SymbolPatterns = SymbolPatterns {
812 functions: &[],
813 functions_prefix_paren: &[],
814 classes: &[
815 "class ",
816 "public class ",
817 "private class ",
818 "protected class ",
819 "abstract class ",
820 "final class ",
821 "public abstract class ",
822 "public final class ",
823 "interface ",
824 "public interface ",
825 "enum ",
826 "public enum ",
827 "record ",
828 "public record ",
829 "@interface ",
830 ],
831 variables: &[],
832 imports: &["import "],
833 tests: &[
835 "@Test",
836 "@ParameterizedTest",
837 "@RepeatedTest",
838 "@TestFactory",
839 "@TestTemplate",
840 ],
841 assertions: &[
842 "assertEquals(",
843 "assertNotEquals(",
844 "assertTrue(",
845 "assertFalse(",
846 "assertNull(",
847 "assertNotNull(",
848 "assertThat(",
849 "assertThrows(",
850 "assertAll(",
851 "assertArrayEquals(",
852 "assertIterableEquals(",
853 "assertLinesMatch(",
854 ],
855 test_suites: &[],
856};
857
858const SP_CSHARP: SymbolPatterns = SymbolPatterns {
859 functions: &[],
860 functions_prefix_paren: &[],
861 classes: &[
862 "class ",
863 "public class ",
864 "private class ",
865 "protected class ",
866 "internal class ",
867 "abstract class ",
868 "sealed class ",
869 "static class ",
870 "partial class ",
871 "public abstract class ",
872 "public sealed class ",
873 "public static class ",
874 "interface ",
875 "public interface ",
876 "internal interface ",
877 "enum ",
878 "public enum ",
879 "struct ",
880 "public struct ",
881 "record ",
882 "public record ",
883 ],
884 variables: &["var "],
885 imports: &["using "],
886 tests: &[
888 "[TestMethod]",
889 "[Test]",
890 "[Fact]",
891 "[Theory]",
892 "[TestCase(",
893 "[DataRow(",
894 "[InlineData(",
895 "[MemberData(",
896 ],
897 assertions: &[
898 "Assert.AreEqual(",
899 "Assert.AreNotEqual(",
900 "Assert.IsTrue(",
901 "Assert.IsFalse(",
902 "Assert.IsNull(",
903 "Assert.IsNotNull(",
904 "Assert.Equal(",
905 "Assert.NotEqual(",
906 "Assert.True(",
907 "Assert.False(",
908 "Assert.That(",
909 "Assert.Contains(",
910 "Assert.Throws(",
911 "Assert.ThrowsAsync(",
912 "Assert.IsInstanceOfType(",
913 ],
914 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
915};
916
917const TEST_PATTERNS_C_CPP: &[&str] = &[
919 "TEST(",
921 "TEST_F(",
922 "TEST_P(",
923 "TYPED_TEST(",
924 "TYPED_TEST_P(",
925 "INSTANTIATE_TEST_SUITE_P(",
926 "INSTANTIATE_TYPED_TEST_SUITE_P(",
927 "TEST_CASE(",
929 "SECTION(",
930 "SCENARIO(",
931 "SCENARIO_METHOD(",
932 "TEST_CASE_METHOD(",
933 "BOOST_AUTO_TEST_CASE(",
935 "BOOST_FIXTURE_TEST_CASE(",
936 "BOOST_AUTO_TEST_SUITE(",
937 "BOOST_PARAM_TEST_CASE(",
938 "CPPUNIT_TEST(",
940 "CPPUNIT_TEST_SUITE(",
941 "RUN_TEST(",
943 "TEST_IGNORE(",
944 "TEST_FAIL(",
945 "START_TEST(",
947 "tcase_add_test(",
948 "suite_create(",
949 "cmocka_unit_test(",
951 "cmocka_run_group_tests(",
952 "IGNORE_TEST(",
954 "TEST_GROUP(",
955 "TEST_GROUP_BASE(",
956];
957
958const ASSERT_PATTERNS_C_CPP: &[&str] = &[
960 "ASSERT_EQ(",
962 "ASSERT_NE(",
963 "ASSERT_LT(",
964 "ASSERT_LE(",
965 "ASSERT_GT(",
966 "ASSERT_GE(",
967 "ASSERT_TRUE(",
968 "ASSERT_FALSE(",
969 "ASSERT_STREQ(",
970 "ASSERT_STRNE(",
971 "ASSERT_FLOAT_EQ(",
972 "ASSERT_DOUBLE_EQ(",
973 "ASSERT_NEAR(",
974 "ASSERT_THROW(",
975 "ASSERT_NO_THROW(",
976 "ASSERT_ANY_THROW(",
977 "EXPECT_EQ(",
979 "EXPECT_NE(",
980 "EXPECT_LT(",
981 "EXPECT_LE(",
982 "EXPECT_GT(",
983 "EXPECT_GE(",
984 "EXPECT_TRUE(",
985 "EXPECT_FALSE(",
986 "EXPECT_STREQ(",
987 "EXPECT_STRNE(",
988 "EXPECT_FLOAT_EQ(",
989 "EXPECT_DOUBLE_EQ(",
990 "EXPECT_NEAR(",
991 "EXPECT_THROW(",
992 "EXPECT_NO_THROW(",
993 "EXPECT_ANY_THROW(",
994 "REQUIRE(",
996 "CHECK(",
997 "REQUIRE_FALSE(",
998 "CHECK_FALSE(",
999 "REQUIRE_NOTHROW(",
1000 "CHECK_NOTHROW(",
1001 "REQUIRE_THROWS(",
1002 "CHECK_THROWS(",
1003 "REQUIRE_THAT(",
1004 "CHECK_THAT(",
1005 "TEST_ASSERT_EQUAL(",
1007 "TEST_ASSERT_EQUAL_INT(",
1008 "TEST_ASSERT_EQUAL_STRING(",
1009 "TEST_ASSERT_EQUAL_FLOAT(",
1010 "TEST_ASSERT_EQUAL_DOUBLE(",
1011 "TEST_ASSERT_EQUAL_PTR(",
1012 "TEST_ASSERT_TRUE(",
1013 "TEST_ASSERT_FALSE(",
1014 "TEST_ASSERT_NULL(",
1015 "TEST_ASSERT_NOT_NULL(",
1016 "TEST_ASSERT_BITS_HIGH(",
1017 "TEST_ASSERT_BITS_LOW(",
1018 "assert_int_equal(",
1020 "assert_int_not_equal(",
1021 "assert_string_equal(",
1022 "assert_string_not_equal(",
1023 "assert_true(",
1024 "assert_false(",
1025 "assert_null(",
1026 "assert_non_null(",
1027 "assert_ptr_equal(",
1028 "assert_memory_equal(",
1029 "assert_return_code(",
1030];
1031
1032const SUITE_PATTERNS_C_CPP: &[&str] = &[
1034 "TEST_GROUP(",
1035 "TEST_GROUP_BASE(",
1036 "BOOST_AUTO_TEST_SUITE(",
1037 "CPPUNIT_TEST_SUITE(",
1038 "CPPUNIT_TEST_SUITE_END(",
1039];
1040
1041const SP_C: SymbolPatterns = SymbolPatterns {
1042 functions: &[],
1044 functions_prefix_paren: &[
1045 "void ",
1046 "int ",
1047 "char ",
1048 "float ",
1049 "double ",
1050 "long ",
1051 "unsigned ",
1052 "size_t ",
1053 "static ",
1054 "inline ",
1055 "const ",
1056 "extern ",
1057 ],
1058 classes: &[
1059 "struct ",
1060 "typedef struct ",
1061 "union ",
1062 "typedef union ",
1063 "typedef enum ",
1064 ],
1065 variables: &[],
1066 imports: &["#include "],
1067 tests: TEST_PATTERNS_C_CPP,
1068 assertions: ASSERT_PATTERNS_C_CPP,
1069 test_suites: SUITE_PATTERNS_C_CPP,
1070};
1071
1072const SP_CPP: SymbolPatterns = SymbolPatterns {
1073 functions: &[
1075 "virtual ", "explicit ", "~", "operator", ],
1080 functions_prefix_paren: &[
1081 "void ",
1082 "bool ",
1083 "int ",
1084 "char ",
1085 "float ",
1086 "double ",
1087 "long ",
1088 "unsigned ",
1089 "size_t ",
1090 "auto ",
1091 "static ",
1092 "inline ",
1093 "constexpr ",
1094 "const ",
1095 "extern ",
1096 ],
1097 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1099 variables: &[],
1100 imports: &["#include "],
1101 tests: TEST_PATTERNS_C_CPP,
1102 assertions: ASSERT_PATTERNS_C_CPP,
1103 test_suites: SUITE_PATTERNS_C_CPP,
1104};
1105
1106const SP_SHELL: SymbolPatterns = SymbolPatterns {
1107 functions: &["function "],
1108 functions_prefix_paren: &[],
1109 classes: &[],
1110 variables: &["declare ", "local ", "export "],
1111 imports: &["source ", ". "],
1112 tests: &[],
1113 assertions: &[],
1114 test_suites: &[],
1115};
1116
1117const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1118 functions: &["function ", "Function "],
1119 functions_prefix_paren: &[],
1120 classes: &["class "],
1121 variables: &[],
1122 imports: &["Import-Module ", "using "],
1123 tests: &["Describe ", "It ", "Context "],
1125 assertions: &[],
1126 test_suites: &[],
1127};
1128
1129const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1130 functions: &[
1131 "fun ",
1132 "private fun ",
1133 "public fun ",
1134 "protected fun ",
1135 "internal fun ",
1136 "override fun ",
1137 "suspend fun ",
1138 "abstract fun ",
1139 "open fun ",
1140 "private suspend fun ",
1141 "public suspend fun ",
1142 ],
1143 functions_prefix_paren: &[],
1144 classes: &[
1145 "class ",
1146 "data class ",
1147 "sealed class ",
1148 "abstract class ",
1149 "open class ",
1150 "object ",
1151 "companion object",
1152 "interface ",
1153 "enum class ",
1154 "annotation class ",
1155 ],
1156 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1157 imports: &["import "],
1158 tests: &[
1160 "@Test",
1161 "@ParameterizedTest",
1162 "@RepeatedTest",
1163 "\"should ",
1164 "\"it ",
1165 ],
1166 assertions: &[
1167 "assertEquals(",
1168 "assertNotEquals(",
1169 "assertTrue(",
1170 "assertFalse(",
1171 "assertNull(",
1172 "assertNotNull(",
1173 "assertThat(",
1174 "assertThrows(",
1175 "shouldBe(",
1176 "shouldNotBe(",
1177 "shouldThrow(",
1178 ],
1179 test_suites: &[],
1180};
1181
1182const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1183 functions: &[
1184 "func ",
1185 "private func ",
1186 "public func ",
1187 "internal func ",
1188 "override func ",
1189 "open func ",
1190 "static func ",
1191 "class func ",
1192 "mutating func ",
1193 "private static func ",
1194 "public static func ",
1195 ],
1196 functions_prefix_paren: &[],
1197 classes: &[
1198 "class ",
1199 "struct ",
1200 "protocol ",
1201 "enum ",
1202 "extension ",
1203 "actor ",
1204 "public class ",
1205 "private class ",
1206 "open class ",
1207 "final class ",
1208 "public struct ",
1209 "private struct ",
1210 "public protocol ",
1211 ],
1212 variables: &[
1213 "var ",
1214 "let ",
1215 "private var ",
1216 "private let ",
1217 "static var ",
1218 "static let ",
1219 ],
1220 imports: &["import "],
1221 tests: &["func test", "func Test", "@Test"],
1223 assertions: &[
1224 "XCTAssertEqual(",
1225 "XCTAssertNotEqual(",
1226 "XCTAssertTrue(",
1227 "XCTAssertFalse(",
1228 "XCTAssertNil(",
1229 "XCTAssertNotNil(",
1230 "XCTAssertGreaterThan(",
1231 "XCTAssertLessThan(",
1232 "XCTAssertThrowsError(",
1233 "XCTAssertNoThrow(",
1234 "#expect(",
1235 ],
1236 test_suites: &[],
1237};
1238
1239const SP_RUBY: SymbolPatterns = SymbolPatterns {
1240 functions: &["def ", "private def ", "protected def "],
1241 functions_prefix_paren: &[],
1242 classes: &["class ", "module "],
1243 variables: &[],
1244 imports: &["require ", "require_relative "],
1245 tests: &["it ", "it(", "describe ", "context ", "test "],
1247 assertions: &[],
1248 test_suites: &[],
1249};
1250
1251const SP_SCALA: SymbolPatterns = SymbolPatterns {
1252 functions: &["def ", "private def ", "protected def ", "override def "],
1253 functions_prefix_paren: &[],
1254 classes: &[
1255 "class ",
1256 "case class ",
1257 "abstract class ",
1258 "sealed class ",
1259 "object ",
1260 "trait ",
1261 ],
1262 variables: &["val ", "var ", "lazy val "],
1263 imports: &["import "],
1264 tests: &["test(", "it(", "describe("],
1266 assertions: &[],
1267 test_suites: &[],
1268};
1269
1270const SP_PHP: SymbolPatterns = SymbolPatterns {
1271 functions: &[
1272 "function ",
1273 "public function ",
1274 "private function ",
1275 "protected function ",
1276 "static function ",
1277 "abstract function ",
1278 "final function ",
1279 "public static function ",
1280 "private static function ",
1281 "protected static function ",
1282 ],
1283 functions_prefix_paren: &[],
1284 classes: &[
1285 "class ",
1286 "abstract class ",
1287 "final class ",
1288 "interface ",
1289 "trait ",
1290 "enum ",
1291 ],
1292 variables: &[],
1293 imports: &[
1294 "use ",
1295 "require ",
1296 "require_once ",
1297 "include ",
1298 "include_once ",
1299 ],
1300 tests: &[
1302 "public function test",
1303 "function test",
1304 "#[Test]",
1305 "#[DataProvider(",
1306 ],
1307 assertions: &[],
1308 test_suites: &[],
1309};
1310
1311const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1312 functions: &[
1313 "def ",
1314 "defp ",
1315 "defmacro ",
1316 "defmacrop ",
1317 "defguard ",
1318 "defguardp ",
1319 ],
1320 functions_prefix_paren: &[],
1321 classes: &["defmodule ", "defprotocol ", "defimpl "],
1322 variables: &[],
1323 imports: &["import ", "alias ", "use ", "require "],
1324 tests: &["test ", "describe "],
1326 assertions: &[],
1327 test_suites: &[],
1328};
1329
1330const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1331 functions: &[],
1332 functions_prefix_paren: &[],
1333 classes: &["-module("],
1334 variables: &[],
1335 imports: &["-import(", "-include(", "-include_lib("],
1336 tests: &[],
1337 assertions: &[],
1338 test_suites: &[],
1339};
1340
1341const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1342 functions: &[
1343 "let ",
1344 "let rec ",
1345 "member ",
1346 "override ",
1347 "abstract member ",
1348 ],
1349 functions_prefix_paren: &[],
1350 classes: &["type "],
1351 variables: &["let mutable "],
1352 imports: &["open "],
1353 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1355 assertions: &[],
1356 test_suites: &[],
1357};
1358
1359const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1360 functions: &["def ", "private def ", "public def ", "protected def "],
1361 functions_prefix_paren: &[],
1362 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1363 variables: &[],
1364 imports: &["import "],
1365 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1367 assertions: &[],
1368 test_suites: &[],
1369};
1370
1371const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1372 functions: &[],
1373 functions_prefix_paren: &[],
1374 classes: &["class ", "data ", "newtype ", "type "],
1375 variables: &[],
1376 imports: &["import "],
1377 tests: &[],
1378 assertions: &[],
1379 test_suites: &[],
1380};
1381
1382const SP_LUA: SymbolPatterns = SymbolPatterns {
1383 functions: &["function ", "local function "],
1384 functions_prefix_paren: &[],
1385 classes: &[],
1386 variables: &["local "],
1387 imports: &[],
1388 tests: &["it(", "describe(", "pending("],
1390 assertions: &[],
1391 test_suites: &[],
1392};
1393
1394const SP_NIM: SymbolPatterns = SymbolPatterns {
1395 functions: &[
1396 "proc ",
1397 "func ",
1398 "method ",
1399 "iterator ",
1400 "converter ",
1401 "template ",
1402 "macro ",
1403 ],
1404 functions_prefix_paren: &[],
1405 classes: &["type "],
1406 variables: &["var ", "let ", "const "],
1407 imports: &["import ", "from "],
1408 tests: &["test "],
1410 assertions: &[],
1411 test_suites: &[],
1412};
1413
1414const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1415 functions: &["- (", "+ ("],
1416 functions_prefix_paren: &[],
1417 classes: &["@interface ", "@implementation ", "@protocol "],
1418 variables: &[],
1419 imports: &["#import ", "#include "],
1420 tests: &["- (void)test"],
1422 assertions: &[
1423 "XCTAssertEqual(",
1424 "XCTAssertNotEqual(",
1425 "XCTAssertTrue(",
1426 "XCTAssertFalse(",
1427 "XCTAssertNil(",
1428 "XCTAssertNotNil(",
1429 "XCTAssertGreaterThan(",
1430 "XCTAssertLessThan(",
1431 "XCTAssertThrowsError(",
1432 "XCTAssertNoThrow(",
1433 ],
1434 test_suites: &[],
1435};
1436
1437const SP_OCAML: SymbolPatterns = SymbolPatterns {
1438 functions: &["let ", "let rec "],
1439 functions_prefix_paren: &[],
1440 classes: &["type ", "module ", "class "],
1441 variables: &[],
1442 imports: &["open "],
1443 tests: &[],
1444 assertions: &[],
1445 test_suites: &[],
1446};
1447
1448const SP_PERL: SymbolPatterns = SymbolPatterns {
1449 functions: &["sub "],
1450 functions_prefix_paren: &[],
1451 classes: &["package "],
1452 variables: &["my ", "our ", "local "],
1453 imports: &["use ", "require "],
1454 tests: &[],
1455 assertions: &[],
1456 test_suites: &[],
1457};
1458
1459const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1460 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1461 functions_prefix_paren: &[],
1462 classes: &[
1463 "(defrecord ",
1464 "(defprotocol ",
1465 "(deftype ",
1466 "(definterface ",
1467 ],
1468 variables: &["(def ", "(defonce "],
1469 imports: &["(ns ", "(require "],
1470 tests: &["(deftest ", "(testing "],
1472 assertions: &[],
1473 test_suites: &[],
1474};
1475
1476const SP_JULIA: SymbolPatterns = SymbolPatterns {
1477 functions: &["function ", "macro "],
1478 functions_prefix_paren: &[],
1479 classes: &[
1480 "struct ",
1481 "mutable struct ",
1482 "abstract type ",
1483 "primitive type ",
1484 ],
1485 variables: &["const "],
1486 imports: &["import ", "using "],
1487 tests: &["@test ", "@testset "],
1489 assertions: &[],
1490 test_suites: &[],
1491};
1492
1493const SP_DART: SymbolPatterns = SymbolPatterns {
1494 functions: &[],
1495 functions_prefix_paren: &[],
1496 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1497 variables: &["var ", "final ", "const ", "late "],
1498 imports: &["import "],
1499 tests: &["test(", "testWidgets(", "group("],
1501 assertions: &[],
1502 test_suites: &[],
1503};
1504
1505const SP_R: SymbolPatterns = SymbolPatterns {
1506 functions: &[],
1507 functions_prefix_paren: &[],
1508 classes: &[],
1509 variables: &[],
1510 imports: &["library(", "source("],
1511 tests: &["test_that(", "it(", "describe(", "expect_"],
1513 assertions: &[],
1514 test_suites: &[],
1515};
1516
1517const SP_SQL: SymbolPatterns = SymbolPatterns {
1518 functions: &[
1519 "create function ",
1520 "create or replace function ",
1521 "create procedure ",
1522 "create or replace procedure ",
1523 "CREATE FUNCTION ",
1524 "CREATE OR REPLACE FUNCTION ",
1525 "CREATE PROCEDURE ",
1526 "CREATE OR REPLACE PROCEDURE ",
1527 ],
1528 functions_prefix_paren: &[],
1529 classes: &[
1530 "create table ",
1531 "create view ",
1532 "create schema ",
1533 "CREATE TABLE ",
1534 "CREATE VIEW ",
1535 "CREATE SCHEMA ",
1536 ],
1537 variables: &["declare ", "DECLARE "],
1538 imports: &[],
1539 tests: &[],
1540 assertions: &[],
1541 test_suites: &[],
1542};
1543
1544const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1545 functions: &["proc ", "PROC "],
1546 functions_prefix_paren: &[],
1547 classes: &[],
1548 variables: &[],
1549 imports: &["include ", "INCLUDE ", "%include "],
1550 tests: &[],
1551 assertions: &[],
1552 test_suites: &[],
1553};
1554
1555const SP_ZIG: SymbolPatterns = SymbolPatterns {
1556 functions: &[
1557 "fn ",
1558 "pub fn ",
1559 "export fn ",
1560 "inline fn ",
1561 "pub inline fn ",
1562 ],
1563 functions_prefix_paren: &[],
1564 classes: &[],
1565 variables: &["var ", "pub var "],
1566 imports: &[],
1567 tests: &["test \"", "test{"],
1569 assertions: &[],
1570 test_suites: &[],
1571};
1572
1573#[allow(clippy::struct_excessive_bools)]
1577#[derive(Clone, Copy)]
1578struct StaticLangConfig {
1579 line_comments: &'static [&'static str],
1580 block_comment: Option<(&'static str, &'static str)>,
1581 allow_single_quote_strings: bool,
1582 allow_double_quote_strings: bool,
1583 allow_triple_quote_strings: bool,
1584 allow_csharp_verbatim_strings: bool,
1585 symbol_patterns: SymbolPatterns,
1586 has_preprocessor: bool,
1588}
1589
1590#[allow(clippy::struct_excessive_bools)]
1591#[derive(Debug, Clone)]
1592struct ScanConfig {
1593 line_comments: &'static [&'static str],
1594 block_comment: Option<(&'static str, &'static str)>,
1595 allow_single_quote_strings: bool,
1596 allow_double_quote_strings: bool,
1597 allow_triple_quote_strings: bool,
1598 allow_csharp_verbatim_strings: bool,
1599 skip_lines: HashSet<usize>,
1600 symbol_patterns: SymbolPatterns,
1601}
1602
1603const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
1613 line_comments: &["//"],
1614 block_comment: Some(("/*", "*/")),
1615 allow_single_quote_strings: true,
1616 allow_double_quote_strings: true,
1617 allow_triple_quote_strings: false,
1618 allow_csharp_verbatim_strings: false,
1619 symbol_patterns: SP_NONE,
1620 has_preprocessor: false,
1621};
1622
1623const HASH_BASE: StaticLangConfig = StaticLangConfig {
1627 line_comments: &["#"],
1628 block_comment: None,
1629 allow_single_quote_strings: true,
1630 allow_double_quote_strings: true,
1631 allow_triple_quote_strings: false,
1632 allow_csharp_verbatim_strings: false,
1633 symbol_patterns: SP_NONE,
1634 has_preprocessor: false,
1635};
1636
1637static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1641 (
1643 Language::C,
1644 StaticLangConfig {
1645 symbol_patterns: SP_C,
1646 has_preprocessor: true,
1647 ..C_SLASH_BASE
1648 },
1649 ),
1650 (
1651 Language::Cpp,
1652 StaticLangConfig {
1653 symbol_patterns: SP_CPP,
1654 has_preprocessor: true,
1655 ..C_SLASH_BASE
1656 },
1657 ),
1658 (
1659 Language::ObjectiveC,
1660 StaticLangConfig {
1661 symbol_patterns: SP_OBJECTIVEC,
1662 has_preprocessor: true,
1663 ..C_SLASH_BASE
1664 },
1665 ),
1666 (
1668 Language::CSharp,
1669 StaticLangConfig {
1670 symbol_patterns: SP_CSHARP,
1671 allow_csharp_verbatim_strings: true,
1672 ..C_SLASH_BASE
1673 },
1674 ),
1675 (
1676 Language::Go,
1677 StaticLangConfig {
1678 symbol_patterns: SP_GO,
1679 ..C_SLASH_BASE
1680 },
1681 ),
1682 (
1683 Language::Java,
1684 StaticLangConfig {
1685 symbol_patterns: SP_JAVA,
1686 ..C_SLASH_BASE
1687 },
1688 ),
1689 (
1690 Language::JavaScript,
1691 StaticLangConfig {
1692 symbol_patterns: SP_JS,
1693 ..C_SLASH_BASE
1694 },
1695 ),
1696 (
1697 Language::TypeScript,
1698 StaticLangConfig {
1699 symbol_patterns: SP_TS,
1700 ..C_SLASH_BASE
1701 },
1702 ),
1703 (
1704 Language::Svelte,
1705 StaticLangConfig {
1706 symbol_patterns: SP_JS,
1707 ..C_SLASH_BASE
1708 },
1709 ),
1710 (
1711 Language::Vue,
1712 StaticLangConfig {
1713 symbol_patterns: SP_JS,
1714 ..C_SLASH_BASE
1715 },
1716 ),
1717 (
1718 Language::Dart,
1719 StaticLangConfig {
1720 symbol_patterns: SP_DART,
1721 ..C_SLASH_BASE
1722 },
1723 ),
1724 (
1725 Language::Groovy,
1726 StaticLangConfig {
1727 symbol_patterns: SP_GROOVY,
1728 ..C_SLASH_BASE
1729 },
1730 ),
1731 (
1732 Language::Kotlin,
1733 StaticLangConfig {
1734 symbol_patterns: SP_KOTLIN,
1735 ..C_SLASH_BASE
1736 },
1737 ),
1738 (
1739 Language::Scala,
1740 StaticLangConfig {
1741 symbol_patterns: SP_SCALA,
1742 ..C_SLASH_BASE
1743 },
1744 ),
1745 (
1746 Language::Scss,
1747 StaticLangConfig {
1748 symbol_patterns: SP_NONE,
1749 ..C_SLASH_BASE
1750 },
1751 ),
1752 (
1754 Language::Rust,
1755 StaticLangConfig {
1756 symbol_patterns: SP_RUST,
1757 allow_single_quote_strings: false,
1758 ..C_SLASH_BASE
1759 },
1760 ),
1761 (
1763 Language::Swift,
1764 StaticLangConfig {
1765 symbol_patterns: SP_SWIFT,
1766 allow_single_quote_strings: false,
1767 ..C_SLASH_BASE
1768 },
1769 ),
1770 (
1772 Language::Zig,
1773 StaticLangConfig {
1774 symbol_patterns: SP_ZIG,
1775 block_comment: None,
1776 ..C_SLASH_BASE
1777 },
1778 ),
1779 (
1781 Language::FSharp,
1782 StaticLangConfig {
1783 line_comments: &["//"],
1784 block_comment: Some(("(*", "*)")),
1785 allow_single_quote_strings: false,
1786 allow_double_quote_strings: true,
1787 symbol_patterns: SP_FSHARP,
1788 ..C_SLASH_BASE
1789 },
1790 ),
1791 (
1793 Language::Shell,
1794 StaticLangConfig {
1795 symbol_patterns: SP_SHELL,
1796 ..HASH_BASE
1797 },
1798 ),
1799 (
1800 Language::Elixir,
1801 StaticLangConfig {
1802 symbol_patterns: SP_ELIXIR,
1803 ..HASH_BASE
1804 },
1805 ),
1806 (
1807 Language::Perl,
1808 StaticLangConfig {
1809 symbol_patterns: SP_PERL,
1810 ..HASH_BASE
1811 },
1812 ),
1813 (
1814 Language::R,
1815 StaticLangConfig {
1816 symbol_patterns: SP_R,
1817 ..HASH_BASE
1818 },
1819 ),
1820 (
1821 Language::Ruby,
1822 StaticLangConfig {
1823 symbol_patterns: SP_RUBY,
1824 ..HASH_BASE
1825 },
1826 ),
1827 (
1829 Language::Python,
1830 StaticLangConfig {
1831 symbol_patterns: SP_PYTHON,
1832 allow_triple_quote_strings: true,
1833 ..HASH_BASE
1834 },
1835 ),
1836 (
1838 Language::PowerShell,
1839 StaticLangConfig {
1840 symbol_patterns: SP_POWERSHELL,
1841 block_comment: Some(("<#", "#>")),
1842 ..HASH_BASE
1843 },
1844 ),
1845 (
1847 Language::Nim,
1848 StaticLangConfig {
1849 symbol_patterns: SP_NIM,
1850 block_comment: Some(("#[", "]#")),
1851 ..HASH_BASE
1852 },
1853 ),
1854 (
1856 Language::Makefile,
1857 StaticLangConfig {
1858 symbol_patterns: SP_NONE,
1859 allow_single_quote_strings: false,
1860 allow_double_quote_strings: false,
1861 ..HASH_BASE
1862 },
1863 ),
1864 (
1865 Language::Dockerfile,
1866 StaticLangConfig {
1867 symbol_patterns: SP_NONE,
1868 allow_single_quote_strings: false,
1869 allow_double_quote_strings: false,
1870 ..HASH_BASE
1871 },
1872 ),
1873 (
1876 Language::Css,
1877 StaticLangConfig {
1878 line_comments: &[],
1879 block_comment: Some(("/*", "*/")),
1880 symbol_patterns: SP_NONE,
1881 ..C_SLASH_BASE
1882 },
1883 ),
1884 (
1886 Language::Html,
1887 StaticLangConfig {
1888 line_comments: &[],
1889 block_comment: Some(("<!--", "-->")),
1890 allow_single_quote_strings: false,
1891 allow_double_quote_strings: false,
1892 symbol_patterns: SP_NONE,
1893 ..C_SLASH_BASE
1894 },
1895 ),
1896 (
1897 Language::Xml,
1898 StaticLangConfig {
1899 line_comments: &[],
1900 block_comment: Some(("<!--", "-->")),
1901 allow_single_quote_strings: false,
1902 allow_double_quote_strings: false,
1903 symbol_patterns: SP_NONE,
1904 ..C_SLASH_BASE
1905 },
1906 ),
1907 (
1909 Language::Lua,
1910 StaticLangConfig {
1911 line_comments: &["--"],
1912 block_comment: Some(("--[[", "]]")),
1913 symbol_patterns: SP_LUA,
1914 ..C_SLASH_BASE
1915 },
1916 ),
1917 (
1919 Language::Haskell,
1920 StaticLangConfig {
1921 line_comments: &["--"],
1922 block_comment: Some(("{-", "-}")),
1923 symbol_patterns: SP_HASKELL,
1924 ..C_SLASH_BASE
1925 },
1926 ),
1927 (
1929 Language::Sql,
1930 StaticLangConfig {
1931 line_comments: &["--"],
1932 block_comment: Some(("/*", "*/")),
1933 allow_single_quote_strings: true,
1934 allow_double_quote_strings: false,
1935 symbol_patterns: SP_SQL,
1936 ..C_SLASH_BASE
1937 },
1938 ),
1939 (
1941 Language::Ocaml,
1942 StaticLangConfig {
1943 line_comments: &[],
1944 block_comment: Some(("(*", "*)")),
1945 allow_single_quote_strings: false,
1946 symbol_patterns: SP_OCAML,
1947 ..C_SLASH_BASE
1948 },
1949 ),
1950 (
1952 Language::Assembly,
1953 StaticLangConfig {
1954 line_comments: &[";"],
1955 block_comment: None,
1956 allow_single_quote_strings: false,
1957 allow_double_quote_strings: false,
1958 symbol_patterns: SP_ASSEMBLY,
1959 ..C_SLASH_BASE
1960 },
1961 ),
1962 (
1963 Language::Clojure,
1964 StaticLangConfig {
1965 line_comments: &[";"],
1966 block_comment: None,
1967 allow_single_quote_strings: false,
1968 symbol_patterns: SP_CLOJURE,
1969 ..C_SLASH_BASE
1970 },
1971 ),
1972 (
1974 Language::Erlang,
1975 StaticLangConfig {
1976 line_comments: &["%"],
1977 block_comment: None,
1978 allow_single_quote_strings: false,
1979 symbol_patterns: SP_ERLANG,
1980 ..C_SLASH_BASE
1981 },
1982 ),
1983 (
1985 Language::Php,
1986 StaticLangConfig {
1987 line_comments: &["//", "#"],
1988 block_comment: Some(("/*", "*/")),
1989 symbol_patterns: SP_PHP,
1990 ..C_SLASH_BASE
1991 },
1992 ),
1993 (
1995 Language::Julia,
1996 StaticLangConfig {
1997 line_comments: &["#"],
1998 block_comment: Some(("#=", "=#")),
1999 allow_single_quote_strings: false,
2000 allow_triple_quote_strings: true,
2001 symbol_patterns: SP_JULIA,
2002 ..C_SLASH_BASE
2003 },
2004 ),
2005];
2006
2007#[derive(Debug, Clone, Copy)]
2010struct IeeeFlags {
2011 has_preprocessor_directives: bool,
2013 blank_in_block_comment_as_comment: bool,
2015 collapse_continuation_lines: bool,
2017}
2018
2019#[derive(Debug, Clone, Copy)]
2020enum StringState {
2021 Single(char),
2022 Triple(&'static str),
2023 VerbatimDouble,
2024}
2025
2026#[allow(clippy::struct_excessive_bools)]
2027#[derive(Debug, Default)]
2028struct LineFacts {
2029 has_code: bool,
2030 has_single_comment: bool,
2031 has_multi_comment: bool,
2032 has_docstring: bool,
2033}
2034
2035fn process_string_char(
2039 state: StringState,
2040 chars: &[char],
2041 i: usize,
2042) -> (Option<StringState>, usize) {
2043 match state {
2044 StringState::Single(delim) => {
2045 if chars[i] == '\\' {
2046 return (Some(state), 2); }
2048 if chars[i] == delim {
2049 (None, 1)
2050 } else {
2051 (Some(state), 1)
2052 }
2053 }
2054 StringState::Triple(delim) => {
2055 if starts_with(chars, i, delim) {
2056 (None, delim.len())
2057 } else {
2058 (Some(state), 1)
2059 }
2060 }
2061 StringState::VerbatimDouble => {
2062 if starts_with(chars, i, "\"\"") {
2063 return (Some(state), 2); }
2065 if chars[i] == '"' {
2066 (None, 1)
2067 } else {
2068 (Some(state), 1)
2069 }
2070 }
2071 }
2072}
2073
2074fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2078 if starts_with(chars, i, close) {
2079 (false, close.len())
2080 } else {
2081 (true, 1)
2082 }
2083}
2084
2085fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2089 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2090 return Some((StringState::VerbatimDouble, 2));
2091 }
2092 if config.allow_triple_quote_strings {
2093 if starts_with(chars, i, "\"\"\"") {
2094 return Some((StringState::Triple("\"\"\""), 3));
2095 }
2096 if starts_with(chars, i, "'''") {
2097 return Some((StringState::Triple("'''"), 3));
2098 }
2099 }
2100 if config.allow_single_quote_strings && chars[i] == '\'' {
2101 return Some((StringState::Single('\''), 1));
2102 }
2103 if config.allow_double_quote_strings && chars[i] == '"' {
2104 return Some((StringState::Single('"'), 1));
2105 }
2106 None
2107}
2108
2109fn step_through_block_comment(
2115 chars: &[char],
2116 i: usize,
2117 block_comment: Option<(&'static str, &'static str)>,
2118 in_block_comment: &mut bool,
2119) -> usize {
2120 if let Some((_, close)) = block_comment {
2121 let (still_in, advance) = process_block_comment_char(chars, i, close);
2122 *in_block_comment = still_in;
2123 return advance;
2124 }
2125 0
2126}
2127
2128fn try_open_block_comment(
2131 chars: &[char],
2132 i: usize,
2133 block_comment: Option<(&'static str, &'static str)>,
2134) -> Option<usize> {
2135 let (open, _) = block_comment?;
2136 starts_with(chars, i, open).then_some(open.len())
2137}
2138
2139fn scan_line(
2143 chars: &[char],
2144 config: &ScanConfig,
2145 facts: &mut LineFacts,
2146 in_block_comment: &mut bool,
2147 string_state: &mut Option<StringState>,
2148) {
2149 let mut i = 0usize;
2150 while i < chars.len() {
2151 if let Some(state) = *string_state {
2153 facts.has_code = true;
2154 let (new_state, advance) = process_string_char(state, chars, i);
2155 *string_state = new_state;
2156 i += advance;
2157 continue;
2158 }
2159
2160 if *in_block_comment {
2162 facts.has_multi_comment = true;
2163 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2164 continue;
2165 }
2166
2167 if chars[i].is_whitespace() {
2169 i += 1;
2170 continue;
2171 }
2172
2173 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2175 facts.has_code = true;
2176 *string_state = Some(new_state);
2177 i += advance;
2178 continue;
2179 }
2180
2181 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2183 facts.has_multi_comment = true;
2184 *in_block_comment = true;
2185 i += advance;
2186 continue;
2187 }
2188
2189 if config
2191 .line_comments
2192 .iter()
2193 .any(|prefix| starts_with(chars, i, prefix))
2194 {
2195 facts.has_single_comment = true;
2196 break;
2197 }
2198
2199 facts.has_code = true;
2201 i += 1;
2202 }
2203}
2204
2205fn finalize_line_facts(
2210 facts: LineFacts,
2211 trimmed: &str,
2212 raw: &mut RawLineCounts,
2213 ieee: IeeeFlags,
2214 in_block_comment: bool,
2215 string_state: Option<StringState>,
2216 pending_continuation: &mut Option<LineFacts>,
2217) -> Option<LineFacts> {
2218 if ieee.has_preprocessor_directives
2222 && facts.has_code
2223 && !facts.has_single_comment
2224 && !facts.has_multi_comment
2225 && trimmed.starts_with('#')
2226 {
2227 raw.compiler_directive_lines += 1;
2228 }
2229
2230 let is_continuation = ieee.collapse_continuation_lines
2233 && !in_block_comment
2234 && string_state.is_none()
2235 && trimmed.ends_with('\\');
2236
2237 if is_continuation {
2238 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2239 pending.has_code |= facts.has_code;
2240 pending.has_single_comment |= facts.has_single_comment;
2241 pending.has_multi_comment |= facts.has_multi_comment;
2242 pending.has_docstring |= facts.has_docstring;
2243 return None; }
2245
2246 let emit = if let Some(pending) = pending_continuation.take() {
2248 LineFacts {
2249 has_code: pending.has_code | facts.has_code,
2250 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2251 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2252 has_docstring: pending.has_docstring | facts.has_docstring,
2253 }
2254 } else {
2255 facts
2256 };
2257 Some(emit)
2258}
2259
2260#[allow(clippy::needless_pass_by_value)]
2265#[allow(clippy::too_many_arguments)]
2266#[allow(clippy::many_single_char_names)] fn process_physical_line(
2268 line: &str,
2269 line_idx: usize,
2270 config: &ScanConfig,
2271 raw: &mut RawLineCounts,
2272 in_block_comment: &mut bool,
2273 string_state: &mut Option<StringState>,
2274 pending_continuation: &mut Option<LineFacts>,
2275 ieee: IeeeFlags,
2276) {
2277 raw.total_physical_lines += 1;
2278
2279 if config.skip_lines.contains(&line_idx) {
2280 raw.docstring_comment_lines += 1;
2281 return;
2282 }
2283
2284 let trimmed = line.trim();
2285 let mut facts = LineFacts::default();
2286
2287 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2291 facts.has_multi_comment = true;
2292 }
2293
2294 let chars: Vec<char> = line.chars().collect();
2295 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2296
2297 let Some(emit) = finalize_line_facts(
2298 facts,
2299 trimmed,
2300 raw,
2301 ieee,
2302 *in_block_comment,
2303 *string_state,
2304 pending_continuation,
2305 ) else {
2306 return;
2307 };
2308
2309 classify_line(raw, &emit, trimmed);
2310
2311 if emit.has_code {
2312 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2313 raw.functions += f;
2314 raw.classes += c;
2315 raw.variables += v;
2316 raw.imports += i;
2317 raw.test_count += t;
2318 raw.test_assertion_count += a;
2319 raw.test_suite_count += s;
2320 }
2321}
2322
2323#[allow(clippy::needless_pass_by_value)]
2324fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2325 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2326 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2327
2328 let mut raw = RawLineCounts::default();
2329 let mut warnings = Vec::new();
2330
2331 let mut in_block_comment = false;
2332 let mut string_state: Option<StringState> = None;
2333 let mut pending_continuation: Option<LineFacts> = None;
2335
2336 for (line_idx, line) in lines.iter().enumerate() {
2337 process_physical_line(
2338 line,
2339 line_idx,
2340 &config,
2341 &mut raw,
2342 &mut in_block_comment,
2343 &mut string_state,
2344 &mut pending_continuation,
2345 ieee,
2346 );
2347 }
2348
2349 if let Some(pending) = pending_continuation.take() {
2351 classify_line(&mut raw, &pending, "");
2352 }
2353
2354 if in_block_comment {
2355 warnings.push("unclosed block comment detected; result is best effort".into());
2356 }
2357 if string_state.is_some() {
2358 warnings.push("unclosed string literal detected; result is best effort".into());
2359 }
2360
2361 RawFileAnalysis {
2362 raw,
2363 parse_mode: if warnings.is_empty() {
2364 ParseMode::Lexical
2365 } else {
2366 ParseMode::LexicalBestEffort
2367 },
2368 warnings,
2369 style_analysis: None,
2370 }
2371}
2372
2373const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2374 if facts.has_docstring {
2375 raw.docstring_comment_lines += 1;
2376 } else if !facts.has_code
2377 && !facts.has_single_comment
2378 && !facts.has_multi_comment
2379 && trimmed.is_empty()
2380 {
2381 raw.blank_only_lines += 1;
2382 } else if facts.has_code && facts.has_single_comment {
2383 raw.mixed_code_single_comment_lines += 1;
2384 } else if facts.has_code && facts.has_multi_comment {
2385 raw.mixed_code_multi_comment_lines += 1;
2386 } else if facts.has_code {
2387 raw.code_only_lines += 1;
2388 } else if facts.has_single_comment {
2389 raw.single_comment_only_lines += 1;
2390 } else if facts.has_multi_comment {
2391 raw.multi_comment_only_lines += 1;
2392 } else if trimmed.is_empty() {
2393 raw.blank_only_lines += 1;
2394 } else {
2395 raw.skipped_unknown_lines += 1;
2396 }
2397}
2398
2399fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2400 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2401 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
2404 0
2405 } else if let Some(paren_pos) = trimmed.find('(') {
2406 if trimmed[..paren_pos].contains('=') {
2407 0
2408 } else {
2409 hit(patterns.functions_prefix_paren)
2410 }
2411 } else {
2412 0
2413 };
2414 let test_hit = hit(patterns.tests);
2415 let fn_hit = if test_hit == 0 {
2422 hit(patterns.functions) | fn_pp
2423 } else {
2424 0
2425 };
2426 let class_hit = if test_hit == 0 {
2427 hit(patterns.classes)
2428 } else {
2429 0
2430 };
2431 (
2432 fn_hit,
2433 class_hit,
2434 hit(patterns.variables),
2435 hit(patterns.imports),
2436 test_hit,
2437 hit(patterns.assertions),
2438 hit(patterns.test_suites),
2439 )
2440}
2441
2442fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2443 let needle_chars: Vec<char> = needle.chars().collect();
2444 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2445}
2446
2447#[derive(Debug, Clone)]
2448struct PyContext {
2449 indent: usize,
2450 expect_docstring: bool,
2451}
2452
2453fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2455 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2456 contexts.pop();
2457 }
2458}
2459
2460fn py_handle_pending_indent(
2463 pending_block_indent: &mut Option<usize>,
2464 contexts: &mut Vec<PyContext>,
2465 indent: usize,
2466 trimmed: &str,
2467) {
2468 let Some(base_indent) = *pending_block_indent else {
2469 return;
2470 };
2471 if indent > base_indent {
2472 contexts.push(PyContext {
2473 indent,
2474 expect_docstring: true,
2475 });
2476 *pending_block_indent = None;
2477 } else if !trimmed.starts_with('@') {
2478 *pending_block_indent = None;
2479 }
2480}
2481
2482fn py_try_record_docstring(
2488 ctx: &mut PyContext,
2489 trimmed: &str,
2490 idx: usize,
2491 docstring_lines: &mut HashSet<usize>,
2492 active_docstring: &mut Option<(&'static str, usize)>,
2493) -> bool {
2494 if !ctx.expect_docstring {
2495 return false;
2496 }
2497 if let Some(delim) = docstring_delimiter(trimmed) {
2498 docstring_lines.insert(idx);
2499 ctx.expect_docstring = false;
2500 if !closes_triple_docstring(trimmed, delim, true) {
2501 *active_docstring = Some((delim, idx));
2502 }
2503 return true;
2504 }
2505 ctx.expect_docstring = false;
2506 false
2507}
2508
2509fn track_active_docstring(
2513 active_docstring: &mut Option<(&'static str, usize)>,
2514 docstring_lines: &mut HashSet<usize>,
2515 idx: usize,
2516 trimmed: &str,
2517) -> bool {
2518 let Some((delim, start_line)) = *active_docstring else {
2519 return false;
2520 };
2521 docstring_lines.insert(idx);
2522 if closes_triple_docstring(trimmed, delim, idx == start_line) {
2523 *active_docstring = None;
2524 }
2525 true
2526}
2527
2528fn try_record_docstring_if_context(
2531 contexts: &mut [PyContext],
2532 trimmed: &str,
2533 idx: usize,
2534 docstring_lines: &mut HashSet<usize>,
2535 active_docstring: &mut Option<(&'static str, usize)>,
2536) -> bool {
2537 let Some(ctx) = contexts.last_mut() else {
2538 return false;
2539 };
2540 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2541}
2542
2543fn mark_unclosed_docstring_lines(
2545 active_docstring: Option<&(&'static str, usize)>,
2546 docstring_lines: &mut HashSet<usize>,
2547 num_lines: usize,
2548) {
2549 if let Some(&(_, start_line)) = active_docstring {
2550 for idx in start_line..num_lines {
2551 docstring_lines.insert(idx);
2552 }
2553 }
2554}
2555
2556fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2557 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2558 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2559
2560 let mut docstring_lines = HashSet::new();
2561 let mut contexts = vec![PyContext {
2562 indent: 0,
2563 expect_docstring: true,
2564 }];
2565 let mut pending_block_indent: Option<usize> = None;
2566 let mut active_docstring: Option<(&'static str, usize)> = None;
2567
2568 for (idx, line) in lines.iter().enumerate() {
2569 let trimmed = line.trim();
2570 let indent = leading_indent(line);
2571
2572 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2573 continue;
2574 }
2575
2576 if trimmed.is_empty() || trimmed.starts_with('#') {
2578 continue;
2579 }
2580
2581 py_pop_outdented_contexts(&mut contexts, indent);
2582 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2583
2584 if try_record_docstring_if_context(
2585 &mut contexts,
2586 trimmed,
2587 idx,
2588 &mut docstring_lines,
2589 &mut active_docstring,
2590 ) {
2591 continue;
2592 }
2593
2594 if is_python_block_header(trimmed) {
2595 pending_block_indent = Some(indent);
2596 }
2597 }
2598
2599 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2600
2601 docstring_lines
2602}
2603
2604fn leading_indent(line: &str) -> usize {
2605 line.chars().take_while(|c| c.is_whitespace()).count()
2606}
2607
2608fn is_python_block_header(trimmed: &str) -> bool {
2609 (trimmed.starts_with("def ")
2610 || trimmed.starts_with("async def ")
2611 || trimmed.starts_with("class "))
2612 && trimmed.ends_with(':')
2613}
2614
2615fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2616 let mut idx = 0usize;
2617 let bytes = trimmed.as_bytes();
2618 while idx < bytes.len() {
2619 let c = bytes[idx] as char;
2620 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2621 idx += 1;
2622 continue;
2623 }
2624 break;
2625 }
2626
2627 let rest = &trimmed[idx..];
2628 if rest.starts_with("\"\"\"") {
2629 Some("\"\"\"")
2630 } else if rest.starts_with("'''") {
2631 Some("'''")
2632 } else {
2633 None
2634 }
2635}
2636
2637fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2638 let mut occurrences = 0usize;
2639 let mut search = trimmed;
2640 while let Some(index) = search.find(delim) {
2641 occurrences += 1;
2642 search = &search[index + delim.len()..];
2643 }
2644
2645 if same_line_as_start {
2646 occurrences >= 2
2647 } else {
2648 occurrences >= 1
2649 }
2650}
2651
2652#[cfg(feature = "tree-sitter")]
2657pub mod ts {
2658 use tree_sitter::Node;
2659
2660 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2661
2662 struct SymbolKinds {
2664 function_def: &'static str,
2666 class_def: &'static str,
2668 test_fn_prefix: &'static str,
2671 test_class_prefix: &'static str,
2674 assertion_attr_prefix: &'static str,
2678 }
2679
2680 impl SymbolKinds {
2681 const fn none() -> Self {
2682 Self {
2683 function_def: "",
2684 class_def: "",
2685 test_fn_prefix: "",
2686 test_class_prefix: "",
2687 assertion_attr_prefix: "",
2688 }
2689 }
2690 }
2691
2692 fn analyze_lines(
2698 text: &str,
2699 ts_language: &tree_sitter::Language,
2700 comment_node_kinds: &[&str],
2701 docstring_stmt_kind: Option<&str>,
2702 symbols: &SymbolKinds,
2703 ) -> Option<RawFileAnalysis> {
2704 let mut parser = tree_sitter::Parser::new();
2705 parser.set_language(ts_language).ok()?;
2706 let tree = parser.parse(text, None)?;
2707
2708 let lines: Vec<&str> = text.split_terminator('\n').collect();
2709 let n = lines.len();
2710
2711 let mut has_code = vec![false; n];
2712 let mut has_comment = vec![false; n];
2713 let mut comment_is_block = vec![false; n];
2714 let mut has_docstring = vec![false; n];
2715
2716 let mut ctx = VisitCtx {
2718 source: text.as_bytes(),
2719 comment_kinds: comment_node_kinds,
2720 docstring_stmt_kind,
2721 has_code: &mut has_code,
2722 has_comment: &mut has_comment,
2723 comment_is_block: &mut comment_is_block,
2724 has_docstring: &mut has_docstring,
2725 };
2726 visit(tree.root_node(), &mut ctx);
2727
2728 let mut raw = RawLineCounts::default();
2729 classify_ts_lines(
2730 &lines,
2731 &has_code,
2732 &has_comment,
2733 &comment_is_block,
2734 &has_docstring,
2735 &mut raw,
2736 );
2737
2738 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
2740 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
2741 }
2742
2743 Some(RawFileAnalysis {
2744 raw,
2745 parse_mode: ParseMode::TreeSitter,
2746 warnings: Vec::new(),
2747 })
2748 }
2749
2750 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2752 for i in 0..node.child_count() {
2753 #[allow(clippy::cast_possible_truncation)]
2754 if let Some(child) = node.child(i as u32) {
2755 count_symbols(child, source, kinds, raw);
2756 }
2757 }
2758 }
2759
2760 fn try_count_function(
2762 node: Node,
2763 source: &[u8],
2764 kinds: &SymbolKinds,
2765 raw: &mut RawLineCounts,
2766 ) -> bool {
2767 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
2768 return false;
2769 }
2770 let name = node
2771 .child_by_field_name("name")
2772 .and_then(|n| n.utf8_text(source).ok())
2773 .unwrap_or("");
2774 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
2775 raw.test_count += 1;
2776 } else {
2777 raw.functions += 1;
2778 }
2779 recurse_children(node, source, kinds, raw);
2780 true
2781 }
2782
2783 fn try_count_class(
2785 node: Node,
2786 source: &[u8],
2787 kinds: &SymbolKinds,
2788 raw: &mut RawLineCounts,
2789 ) -> bool {
2790 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
2791 return false;
2792 }
2793 let name = node
2794 .child_by_field_name("name")
2795 .and_then(|n| n.utf8_text(source).ok())
2796 .unwrap_or("");
2797 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
2798 raw.test_count += 1;
2799 } else {
2800 raw.classes += 1;
2801 }
2802 recurse_children(node, source, kinds, raw);
2803 true
2804 }
2805
2806 fn try_count_assertion(
2809 node: Node,
2810 source: &[u8],
2811 kinds: &SymbolKinds,
2812 raw: &mut RawLineCounts,
2813 ) -> bool {
2814 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
2815 return false;
2816 }
2817 let Some(func) = node.child_by_field_name("function") else {
2818 return false;
2819 };
2820 if func.kind() != "attribute" {
2821 return false;
2822 }
2823 let attr_text = func
2824 .child_by_field_name("attribute")
2825 .and_then(|n| n.utf8_text(source).ok())
2826 .unwrap_or("");
2827 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
2828 return false;
2829 }
2830 raw.test_assertion_count += 1;
2831 true
2832 }
2833
2834 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2837 if try_count_function(node, source, kinds, raw) {
2838 return;
2839 }
2840 if try_count_class(node, source, kinds, raw) {
2841 return;
2842 }
2843 if try_count_assertion(node, source, kinds, raw) {
2844 return;
2845 }
2846 recurse_children(node, source, kinds, raw);
2847 }
2848
2849 #[allow(clippy::struct_excessive_bools)]
2852 #[derive(Clone, Copy)]
2853 struct TsLineFlags {
2854 has_code: bool,
2855 has_comment: bool,
2856 comment_is_block: bool,
2857 has_docstring: bool,
2858 }
2859
2860 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2862 if trimmed.is_empty() {
2863 raw.blank_only_lines += 1;
2864 } else if flags.has_docstring && !flags.has_code {
2865 raw.docstring_comment_lines += 1;
2866 } else if flags.has_code && flags.has_comment {
2867 if flags.comment_is_block {
2869 raw.mixed_code_multi_comment_lines += 1;
2870 } else {
2871 raw.mixed_code_single_comment_lines += 1;
2872 }
2873 } else if flags.has_comment {
2874 if flags.comment_is_block {
2875 raw.multi_comment_only_lines += 1;
2876 } else {
2877 raw.single_comment_only_lines += 1;
2878 }
2879 } else {
2880 raw.code_only_lines += 1;
2881 }
2882 }
2883
2884 fn classify_ts_lines(
2886 lines: &[&str],
2887 has_code: &[bool],
2888 has_comment: &[bool],
2889 comment_is_block: &[bool],
2890 has_docstring: &[bool],
2891 raw: &mut RawLineCounts,
2892 ) {
2893 for i in 0..lines.len() {
2894 raw.total_physical_lines += 1;
2895 classify_ts_line(
2896 lines[i].trim(),
2897 TsLineFlags {
2898 has_code: has_code[i],
2899 has_comment: has_comment[i],
2900 comment_is_block: comment_is_block[i],
2901 has_docstring: has_docstring[i],
2902 },
2903 raw,
2904 );
2905 }
2906 }
2907
2908 struct VisitCtx<'a> {
2909 source: &'a [u8],
2910 comment_kinds: &'a [&'a str],
2911 docstring_stmt_kind: Option<&'a str>,
2912 has_code: &'a mut Vec<bool>,
2913 has_comment: &'a mut Vec<bool>,
2914 comment_is_block: &'a mut Vec<bool>,
2915 has_docstring: &'a mut Vec<bool>,
2916 }
2917
2918 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2920 let start_row = node.start_position().row;
2921 let end_row = node.end_position().row;
2922 let first_two = node
2923 .utf8_text(ctx.source)
2924 .unwrap_or("")
2925 .get(..2)
2926 .unwrap_or("");
2927 let is_block = first_two == "/*" || first_two == "<#";
2928 for row in start_row..=end_row {
2929 if row < ctx.has_comment.len() {
2930 ctx.has_comment[row] = true;
2931 if is_block {
2932 ctx.comment_is_block[row] = true;
2933 }
2934 }
2935 }
2936 }
2937
2938 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2941 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2942 return false;
2943 };
2944 if kind != stmt_kind || node.named_child_count() != 1 {
2945 return false;
2946 }
2947 let Some(child) = node.named_child(0) else {
2948 return false;
2949 };
2950 if child.kind() != "string" {
2951 return false;
2952 }
2953 let child_start = child.start_position().row;
2954 let child_end = child.end_position().row;
2955 for row in child_start..=child_end {
2956 if row < ctx.has_docstring.len() {
2957 ctx.has_docstring[row] = true;
2958 }
2959 }
2960 true
2961 }
2962
2963 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2965 let start_row = node.start_position().row;
2966 let end_row = node.end_position().row;
2967 for row in start_row..=end_row {
2968 if row < ctx.has_code.len() {
2969 ctx.has_code[row] = true;
2970 }
2971 }
2972 }
2973
2974 #[allow(clippy::too_many_lines)]
2975 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2976 let kind = node.kind();
2977
2978 if ctx.comment_kinds.contains(&kind) {
2980 visit_comment_node(node, ctx);
2981 return;
2982 }
2983
2984 if visit_maybe_docstring(node, kind, ctx) {
2986 return;
2987 }
2988
2989 if node.child_count() == 0 && !node.is_extra() {
2991 visit_leaf_code(node, ctx);
2992 return;
2993 }
2994
2995 for i in 0..node.child_count() {
2996 #[allow(clippy::cast_possible_truncation)]
2997 if let Some(child) = node.child(i as u32) {
2999 visit(child, ctx);
3000 }
3001 }
3002 }
3003
3004 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3005
3006 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3007 function_def: "function_definition",
3008 class_def: "class_definition",
3009 test_fn_prefix: "test_",
3010 test_class_prefix: "Test",
3011 assertion_attr_prefix: "assert",
3012 };
3013
3014 #[must_use]
3016 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3017 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3018 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3019 }
3020
3021 #[must_use]
3023 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3024 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3025 analyze_lines(
3026 text,
3027 &lang,
3028 &["comment"],
3029 Some("expression_statement"),
3030 &PYTHON_SYMBOLS,
3031 )
3032 }
3033}
3034
3035#[cfg(test)]
3036mod tests {
3037 use super::*;
3038
3039 #[test]
3040 fn python_docstrings_are_separated() {
3041 let input = r#""""module docs"""
3042
3043
3044def fn_a():
3045 """function docs"""
3046 value = 1 # trailing comment
3047 return value
3048"#;
3049
3050 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3051 assert_eq!(result.raw.docstring_comment_lines, 2);
3052 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3053 assert_eq!(result.raw.code_only_lines, 2);
3054 }
3055
3056 #[test]
3057 fn c_style_mixed_lines_are_captured() {
3058 let input = "int x = 1; // note\n/* block */\n";
3059 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3060 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3061 assert_eq!(result.raw.multi_comment_only_lines, 1);
3062 }
3063
3064 #[test]
3065 fn detect_language_by_shebang() {
3066 let language = detect_language(
3067 Path::new("script"),
3068 Some("#!/usr/bin/env bash"),
3069 &BTreeMap::new(),
3070 true,
3071 );
3072 assert_eq!(language, Some(Language::Shell));
3073 }
3074
3075 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3078 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3079 let r = &result.raw;
3080 (
3081 r.functions,
3082 r.classes,
3083 r.variables,
3084 r.imports,
3085 r.test_count,
3086 r.test_assertion_count,
3087 r.test_suite_count,
3088 )
3089 }
3090
3091 #[test]
3092 fn python_test_fn_not_double_counted() {
3093 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
3095 assert_eq!(f, 0, "test fn must not also increment functions");
3096 assert_eq!(t, 1, "must be counted as a test");
3097 assert_eq!(c, 0);
3098 }
3099
3100 #[test]
3101 fn python_test_class_not_double_counted() {
3102 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
3104 assert_eq!(c, 0, "test class must not also increment classes");
3105 assert_eq!(t, 1, "must be counted as a test");
3106 assert_eq!(f, 0);
3107 }
3108
3109 #[test]
3110 fn python_regular_fn_counts_as_function() {
3111 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
3112 assert_eq!(f, 1, "regular function must be counted");
3113 assert_eq!(t, 0);
3114 assert_eq!(c, 0);
3115 }
3116
3117 #[test]
3118 fn python_regular_class_counts_as_class() {
3119 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
3120 assert_eq!(c, 1, "regular class must be counted");
3121 assert_eq!(t, 0);
3122 assert_eq!(f, 0);
3123 }
3124
3125 #[test]
3126 fn go_test_fn_not_double_counted() {
3127 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
3128 assert_eq!(f, 0, "Go test func must not also increment functions");
3129 assert_eq!(t, 1, "must be counted as a test");
3130 }
3131
3132 #[test]
3133 fn go_benchmark_fn_not_double_counted() {
3134 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
3135 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
3136 assert_eq!(t, 1, "must be counted as a test");
3137 }
3138
3139 #[test]
3140 fn go_regular_fn_counts_as_function() {
3141 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
3142 assert_eq!(f, 1, "regular Go func must be counted");
3143 assert_eq!(t, 0);
3144 }
3145
3146 #[test]
3147 fn rust_test_attr_counts_as_test_not_function() {
3148 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
3150 assert_eq!(t, 1, "#[test] must be counted as a test");
3151 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
3152 }
3153
3154 #[test]
3155 fn rust_fn_line_counts_as_function_not_test() {
3156 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
3158 assert_eq!(f, 1, "fn declaration must count as a function");
3159 assert_eq!(
3160 t, 0,
3161 "fn declaration line must not be double-counted as a test"
3162 );
3163 }
3164
3165 #[test]
3166 fn js_describe_counts_as_test_not_function() {
3167 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
3168 assert_eq!(t, 1, "describe must be counted as a test");
3169 assert_eq!(f, 0, "describe must not be counted as a function");
3170 }
3171
3172 #[test]
3173 fn js_regular_fn_counts_as_function() {
3174 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
3175 assert_eq!(f, 1, "JS function declaration must be counted");
3176 assert_eq!(t, 0);
3177 }
3178}