1use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12 C,
13 Cpp,
14 CSharp,
15 Go,
16 Java,
17 JavaScript,
18 Python,
19 Rust,
20 Shell,
21 PowerShell,
22 TypeScript,
23 Assembly,
25 Clojure,
26 Css,
27 Dart,
28 Dockerfile,
29 Elixir,
30 Erlang,
31 FSharp,
32 Groovy,
33 Haskell,
34 Html,
35 Julia,
36 Kotlin,
37 Lua,
38 Makefile,
39 Nim,
40 ObjectiveC,
41 Ocaml,
42 Perl,
43 Php,
44 R,
45 Ruby,
46 Scala,
47 Scss,
48 Sql,
49 Svelte,
50 Swift,
51 Vue,
52 Xml,
53 Zig,
54}
55
56impl Language {
57 #[must_use]
58 pub const fn display_name(&self) -> &'static str {
59 match self {
60 Self::C => "C",
61 Self::Cpp => "C++",
62 Self::CSharp => "C#",
63 Self::Go => "Go",
64 Self::Java => "Java",
65 Self::JavaScript => "JavaScript",
66 Self::Python => "Python",
67 Self::Rust => "Rust",
68 Self::Shell => "Shell",
69 Self::PowerShell => "PowerShell",
70 Self::TypeScript => "TypeScript",
71 Self::Assembly => "Assembly",
72 Self::Clojure => "Clojure",
73 Self::Css => "CSS",
74 Self::Dart => "Dart",
75 Self::Dockerfile => "Dockerfile",
76 Self::Elixir => "Elixir",
77 Self::Erlang => "Erlang",
78 Self::FSharp => "F#",
79 Self::Groovy => "Groovy",
80 Self::Haskell => "Haskell",
81 Self::Html => "HTML",
82 Self::Julia => "Julia",
83 Self::Kotlin => "Kotlin",
84 Self::Lua => "Lua",
85 Self::Makefile => "Makefile",
86 Self::Nim => "Nim",
87 Self::ObjectiveC => "Objective-C",
88 Self::Ocaml => "OCaml",
89 Self::Perl => "Perl",
90 Self::Php => "PHP",
91 Self::R => "R",
92 Self::Ruby => "Ruby",
93 Self::Scala => "Scala",
94 Self::Scss => "SCSS",
95 Self::Sql => "SQL",
96 Self::Svelte => "Svelte",
97 Self::Swift => "Swift",
98 Self::Vue => "Vue",
99 Self::Xml => "XML",
100 Self::Zig => "Zig",
101 }
102 }
103
104 #[must_use]
105 pub const fn as_slug(&self) -> &'static str {
106 match self {
107 Self::C => "c",
108 Self::Cpp => "cpp",
109 Self::CSharp => "csharp",
110 Self::Go => "go",
111 Self::Java => "java",
112 Self::JavaScript => "javascript",
113 Self::Python => "python",
114 Self::Rust => "rust",
115 Self::Shell => "shell",
116 Self::PowerShell => "powershell",
117 Self::TypeScript => "typescript",
118 Self::Assembly => "assembly",
119 Self::Clojure => "clojure",
120 Self::Css => "css",
121 Self::Dart => "dart",
122 Self::Dockerfile => "dockerfile",
123 Self::Elixir => "elixir",
124 Self::Erlang => "erlang",
125 Self::FSharp => "fsharp",
126 Self::Groovy => "groovy",
127 Self::Haskell => "haskell",
128 Self::Html => "html",
129 Self::Julia => "julia",
130 Self::Kotlin => "kotlin",
131 Self::Lua => "lua",
132 Self::Makefile => "makefile",
133 Self::Nim => "nim",
134 Self::ObjectiveC => "objectivec",
135 Self::Ocaml => "ocaml",
136 Self::Perl => "perl",
137 Self::Php => "php",
138 Self::R => "r",
139 Self::Ruby => "ruby",
140 Self::Scala => "scala",
141 Self::Scss => "scss",
142 Self::Sql => "sql",
143 Self::Svelte => "svelte",
144 Self::Swift => "swift",
145 Self::Vue => "vue",
146 Self::Xml => "xml",
147 Self::Zig => "zig",
148 }
149 }
150
151 #[must_use]
152 pub fn from_name(name: &str) -> Option<Self> {
153 match name.trim().to_ascii_lowercase().as_str() {
154 "c" => Some(Self::C),
155 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
156 "csharp" | "c#" | "cs" => Some(Self::CSharp),
157 "go" | "golang" => Some(Self::Go),
158 "java" => Some(Self::Java),
159 "javascript" | "js" => Some(Self::JavaScript),
160 "python" | "py" => Some(Self::Python),
161 "rust" | "rs" => Some(Self::Rust),
162 "shell" | "sh" | "bash" => Some(Self::Shell),
163 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
164 "typescript" | "ts" => Some(Self::TypeScript),
165 "assembly" | "asm" => Some(Self::Assembly),
166 "clojure" | "clj" => Some(Self::Clojure),
167 "css" => Some(Self::Css),
168 "dart" => Some(Self::Dart),
169 "dockerfile" | "docker" => Some(Self::Dockerfile),
170 "elixir" | "ex" => Some(Self::Elixir),
171 "erlang" | "erl" => Some(Self::Erlang),
172 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
173 "groovy" => Some(Self::Groovy),
174 "haskell" | "hs" => Some(Self::Haskell),
175 "html" | "htm" => Some(Self::Html),
176 "julia" | "jl" => Some(Self::Julia),
177 "kotlin" | "kt" => Some(Self::Kotlin),
178 "lua" => Some(Self::Lua),
179 "makefile" | "make" | "mk" => Some(Self::Makefile),
180 "nim" => Some(Self::Nim),
181 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
182 "ocaml" | "ml" => Some(Self::Ocaml),
183 "perl" | "pl" => Some(Self::Perl),
184 "php" => Some(Self::Php),
185 "r" => Some(Self::R),
186 "ruby" | "rb" => Some(Self::Ruby),
187 "scala" => Some(Self::Scala),
188 "scss" | "sass" => Some(Self::Scss),
189 "sql" => Some(Self::Sql),
190 "svelte" => Some(Self::Svelte),
191 "swift" => Some(Self::Swift),
192 "vue" => Some(Self::Vue),
193 "xml" => Some(Self::Xml),
194 "zig" => Some(Self::Zig),
195 _ => None,
196 }
197 }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default)]
201pub struct RawLineCounts {
202 pub total_physical_lines: u64,
203 pub blank_only_lines: u64,
204 pub code_only_lines: u64,
205 pub single_comment_only_lines: u64,
206 pub multi_comment_only_lines: u64,
207 pub mixed_code_single_comment_lines: u64,
208 pub mixed_code_multi_comment_lines: u64,
209 pub docstring_comment_lines: u64,
210 pub skipped_unknown_lines: u64,
211 #[serde(default)]
213 pub functions: u64,
214 #[serde(default)]
216 pub classes: u64,
217 #[serde(default)]
219 pub variables: u64,
220 #[serde(default)]
222 pub imports: u64,
223 #[serde(default)]
227 pub compiler_directive_lines: u64,
228 #[serde(default)]
231 pub test_count: u64,
232 #[serde(default)]
235 pub test_assertion_count: u64,
236 #[serde(default)]
239 pub test_suite_count: u64,
240}
241
242#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum ParseMode {
245 Lexical,
246 LexicalBestEffort,
247 TreeSitter,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct RawFileAnalysis {
252 pub raw: RawLineCounts,
253 pub parse_mode: ParseMode,
254 pub warnings: Vec<String>,
255}
256
257#[derive(Debug, Clone, Copy)]
262pub struct AnalysisOptions {
263 pub blank_in_block_comment_as_comment: bool,
266 pub collapse_continuation_lines: bool,
269}
270
271impl Default for AnalysisOptions {
272 fn default() -> Self {
273 Self {
274 blank_in_block_comment_as_comment: true,
275 collapse_continuation_lines: false,
276 }
277 }
278}
279
280#[must_use]
281pub fn supported_languages() -> BTreeSet<Language> {
282 [
283 Language::Assembly,
284 Language::C,
285 Language::Clojure,
286 Language::Cpp,
287 Language::CSharp,
288 Language::Css,
289 Language::Dart,
290 Language::Dockerfile,
291 Language::Elixir,
292 Language::Erlang,
293 Language::FSharp,
294 Language::Go,
295 Language::Groovy,
296 Language::Haskell,
297 Language::Html,
298 Language::Java,
299 Language::JavaScript,
300 Language::Julia,
301 Language::Kotlin,
302 Language::Lua,
303 Language::Makefile,
304 Language::Nim,
305 Language::ObjectiveC,
306 Language::Ocaml,
307 Language::Perl,
308 Language::Php,
309 Language::PowerShell,
310 Language::Python,
311 Language::R,
312 Language::Ruby,
313 Language::Rust,
314 Language::Scala,
315 Language::Scss,
316 Language::Shell,
317 Language::Sql,
318 Language::Svelte,
319 Language::Swift,
320 Language::TypeScript,
321 Language::Vue,
322 Language::Xml,
323 Language::Zig,
324 ]
325 .into_iter()
326 .collect()
327}
328
329fn detect_by_shebang(line: &str) -> Option<Language> {
331 let lower = line.to_ascii_lowercase();
332 if !lower.starts_with("#!") {
333 return None;
334 }
335 if lower.contains("python") {
336 return Some(Language::Python);
337 }
338 if lower.contains("pwsh") || lower.contains("powershell") {
339 return Some(Language::PowerShell);
340 }
341 if lower.contains("bash")
342 || lower.contains("/sh")
343 || lower.contains("zsh")
344 || lower.contains("ksh")
345 {
346 return Some(Language::Shell);
347 }
348 if lower.contains("ruby") {
349 return Some(Language::Ruby);
350 }
351 if lower.contains("perl") {
352 return Some(Language::Perl);
353 }
354 if lower.contains("php") {
355 return Some(Language::Php);
356 }
357 if lower.contains("node") || lower.contains("nodejs") {
358 return Some(Language::JavaScript);
359 }
360 None
361}
362
363fn detect_by_extension(ext: &str) -> Option<Language> {
365 static EXT_MAP: &[(&str, Language)] = &[
367 ("c", Language::C),
368 ("h", Language::C),
369 ("cc", Language::Cpp),
370 ("cp", Language::Cpp),
371 ("cpp", Language::Cpp),
372 ("cxx", Language::Cpp),
373 ("hh", Language::Cpp),
374 ("hpp", Language::Cpp),
375 ("hxx", Language::Cpp),
376 ("cs", Language::CSharp),
377 ("go", Language::Go),
378 ("java", Language::Java),
379 ("js", Language::JavaScript),
380 ("mjs", Language::JavaScript),
381 ("cjs", Language::JavaScript),
382 ("py", Language::Python),
383 ("rs", Language::Rust),
384 ("sh", Language::Shell),
385 ("bash", Language::Shell),
386 ("zsh", Language::Shell),
387 ("ksh", Language::Shell),
388 ("ps1", Language::PowerShell),
389 ("psm1", Language::PowerShell),
390 ("psd1", Language::PowerShell),
391 ("ts", Language::TypeScript),
392 ("mts", Language::TypeScript),
393 ("cts", Language::TypeScript),
394 ("asm", Language::Assembly),
395 ("s", Language::Assembly),
396 ("clj", Language::Clojure),
397 ("cljs", Language::Clojure),
398 ("cljc", Language::Clojure),
399 ("edn", Language::Clojure),
400 ("css", Language::Css),
401 ("dart", Language::Dart),
402 ("ex", Language::Elixir),
403 ("exs", Language::Elixir),
404 ("erl", Language::Erlang),
405 ("hrl", Language::Erlang),
406 ("fs", Language::FSharp),
407 ("fsi", Language::FSharp),
408 ("fsx", Language::FSharp),
409 ("groovy", Language::Groovy),
410 ("gradle", Language::Groovy),
411 ("hs", Language::Haskell),
412 ("lhs", Language::Haskell),
413 ("html", Language::Html),
414 ("htm", Language::Html),
415 ("xhtml", Language::Html),
416 ("jl", Language::Julia),
417 ("kt", Language::Kotlin),
418 ("kts", Language::Kotlin),
419 ("lua", Language::Lua),
420 ("mk", Language::Makefile),
421 ("nim", Language::Nim),
422 ("nims", Language::Nim),
423 ("m", Language::ObjectiveC),
424 ("mm", Language::ObjectiveC),
425 ("ml", Language::Ocaml),
426 ("mli", Language::Ocaml),
427 ("pl", Language::Perl),
428 ("pm", Language::Perl),
429 ("t", Language::Perl),
430 ("php", Language::Php),
431 ("php3", Language::Php),
432 ("php4", Language::Php),
433 ("php5", Language::Php),
434 ("php7", Language::Php),
435 ("phtml", Language::Php),
436 ("r", Language::R),
437 ("rb", Language::Ruby),
438 ("rake", Language::Ruby),
439 ("scala", Language::Scala),
440 ("sc", Language::Scala),
441 ("scss", Language::Scss),
442 ("sass", Language::Scss),
443 ("sql", Language::Sql),
444 ("svelte", Language::Svelte),
445 ("swift", Language::Swift),
446 ("vue", Language::Vue),
447 ("xml", Language::Xml),
448 ("xsd", Language::Xml),
449 ("xsl", Language::Xml),
450 ("xslt", Language::Xml),
451 ("svg", Language::Xml),
452 ("zig", Language::Zig),
453 ];
454 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
455}
456
457fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
459 if filename == "Dockerfile"
461 || filename.starts_with("Dockerfile.")
462 || filename_lower == "dockerfile"
463 {
464 return Some(Language::Dockerfile);
465 }
466 if matches!(
468 filename,
469 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
470 ) {
471 return Some(Language::Makefile);
472 }
473 if matches!(
475 filename,
476 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
477 ) {
478 return Some(Language::Ruby);
479 }
480 None
481}
482
483#[must_use]
484#[allow(clippy::too_many_lines)]
485pub fn detect_language(
486 path: &Path,
487 first_line: Option<&str>,
488 extension_overrides: &BTreeMap<String, String>,
489 shebang_detection: bool,
490) -> Option<Language> {
491 let extension = path
492 .extension()
493 .and_then(|ext| ext.to_str())
494 .map(str::to_ascii_lowercase);
495
496 if let Some(ext) = extension.as_ref() {
498 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
499 if let Some(lang) = Language::from_name(override_name) {
500 return Some(lang);
501 }
502 }
503 }
504
505 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
507 let filename_lower = filename.to_ascii_lowercase();
508
509 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
510 return Some(lang);
511 }
512
513 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
515 return Some(lang);
516 }
517
518 if shebang_detection {
520 if let Some(line) = first_line {
521 if let Some(lang) = detect_by_shebang(line) {
522 return Some(lang);
523 }
524 }
525 }
526
527 None
528}
529
530#[must_use]
531pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
532 #[cfg(feature = "tree-sitter")]
534 {
535 match language {
536 Language::C | Language::Cpp => {
537 if let Some(result) = ts::analyze_c(text) {
538 return result;
539 }
540 }
541 Language::Python => {
542 if let Some(result) = ts::analyze_python(text) {
543 return result;
544 }
545 }
546 _ => {}
547 }
548 }
549
550 let (mut config, has_preprocessor) = language_scan_config(language);
551
552 if language == Language::Python {
554 config.skip_lines = detect_python_docstring_lines(text);
555 }
556
557 let flags = IeeeFlags {
560 has_preprocessor_directives: has_preprocessor,
561 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
562 collapse_continuation_lines: options.collapse_continuation_lines,
563 };
564 analyze_generic(text, config, flags)
565}
566
567fn language_scan_config(language: Language) -> (ScanConfig, bool) {
575 let cfg = LANG_SCAN_TABLE
576 .iter()
577 .find_map(|&(l, c)| (l == language).then_some(c))
578 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
579 (
580 ScanConfig {
581 line_comments: cfg.line_comments,
582 block_comment: cfg.block_comment,
583 allow_single_quote_strings: cfg.allow_single_quote_strings,
584 allow_double_quote_strings: cfg.allow_double_quote_strings,
585 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
586 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
587 skip_lines: HashSet::new(),
588 symbol_patterns: cfg.symbol_patterns,
589 },
590 cfg.has_preprocessor,
591 )
592}
593
594#[derive(Debug, Clone, Copy)]
598struct SymbolPatterns {
599 functions: &'static [&'static str],
600 functions_prefix_paren: &'static [&'static str],
606 classes: &'static [&'static str],
607 variables: &'static [&'static str],
608 imports: &'static [&'static str],
609 tests: &'static [&'static str],
612 assertions: &'static [&'static str],
615 test_suites: &'static [&'static str],
618}
619
620impl SymbolPatterns {
621 const fn none() -> Self {
622 Self {
623 functions: &[],
624 functions_prefix_paren: &[],
625 classes: &[],
626 variables: &[],
627 imports: &[],
628 tests: &[],
629 assertions: &[],
630 test_suites: &[],
631 }
632 }
633}
634
635const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_RUST: SymbolPatterns = SymbolPatterns {
638 functions: &[
639 "fn ",
640 "pub fn ",
641 "pub(crate) fn ",
642 "pub(super) fn ",
643 "async fn ",
644 "pub async fn ",
645 "pub(crate) async fn ",
646 "unsafe fn ",
647 "pub unsafe fn ",
648 "pub(crate) unsafe fn ",
649 "const fn ",
650 "pub const fn ",
651 "pub(crate) const fn ",
652 "extern fn ",
653 "pub extern fn ",
654 ],
655 functions_prefix_paren: &[],
656 classes: &[
657 "struct ",
658 "pub struct ",
659 "pub(crate) struct ",
660 "enum ",
661 "pub enum ",
662 "pub(crate) enum ",
663 "trait ",
664 "pub trait ",
665 "pub(crate) trait ",
666 "impl ",
667 "impl<",
668 "type ",
669 "pub type ",
670 "pub(crate) type ",
671 ],
672 variables: &["let ", "let mut "],
673 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
674 tests: &[
676 "#[test]",
677 "#[tokio::test]",
678 "#[actix_web::test]",
679 "#[rstest]",
680 "#[test_case",
681 ],
682 assertions: &[
683 "assert_eq!(",
684 "assert_ne!(",
685 "assert!(",
686 "assert_matches!(",
687 "assert_err!(",
688 "assert_ok!(",
689 ],
690 test_suites: &[],
691};
692
693const SP_PYTHON: SymbolPatterns = SymbolPatterns {
694 functions: &["def ", "async def "],
695 functions_prefix_paren: &[],
696 classes: &["class "],
697 variables: &[],
698 imports: &["import ", "from "],
699 tests: &["def test_", "async def test_", "class Test"],
701 assertions: &[
702 "self.assertEqual(",
703 "self.assertNotEqual(",
704 "self.assertTrue(",
705 "self.assertFalse(",
706 "self.assertIsNone(",
707 "self.assertIsNotNone(",
708 "self.assertIn(",
709 "self.assertNotIn(",
710 "self.assertRaises(",
711 "self.assertAlmostEqual(",
712 ],
713 test_suites: &[],
714};
715
716const SP_JS: SymbolPatterns = SymbolPatterns {
717 functions: &[
718 "function ",
719 "async function ",
720 "export function ",
721 "export async function ",
722 "export default function ",
723 ],
724 functions_prefix_paren: &[],
725 classes: &["class ", "export class ", "export default class "],
726 variables: &[
727 "var ",
728 "let ",
729 "const ",
730 "export var ",
731 "export let ",
732 "export const ",
733 ],
734 imports: &["import "],
735 tests: &[
737 "describe(",
738 "it(",
739 "test(",
740 "it.each(",
741 "test.each(",
742 "describe.each(",
743 ],
744 assertions: &["expect("],
745 test_suites: &[],
746};
747
748const SP_TS: SymbolPatterns = SymbolPatterns {
749 functions: &[
750 "function ",
751 "async function ",
752 "export function ",
753 "export async function ",
754 "export default function ",
755 ],
756 functions_prefix_paren: &[],
757 classes: &[
758 "class ",
759 "export class ",
760 "export default class ",
761 "abstract class ",
762 "export abstract class ",
763 "interface ",
764 "export interface ",
765 "declare class ",
766 "declare interface ",
767 ],
768 variables: &[
769 "var ",
770 "let ",
771 "const ",
772 "export var ",
773 "export let ",
774 "export const ",
775 ],
776 imports: &["import "],
777 tests: &[
779 "describe(",
780 "it(",
781 "test(",
782 "it.each(",
783 "test.each(",
784 "describe.each(",
785 ],
786 assertions: &["expect("],
787 test_suites: &[],
788};
789
790const SP_GO: SymbolPatterns = SymbolPatterns {
791 functions: &["func "],
792 functions_prefix_paren: &[],
793 classes: &["type "],
794 variables: &["var "],
795 imports: &["import "],
796 tests: &["func Test", "func Benchmark", "func Fuzz"],
798 assertions: &[],
799 test_suites: &[],
800};
801
802const SP_JAVA: SymbolPatterns = SymbolPatterns {
803 functions: &[],
804 functions_prefix_paren: &[],
805 classes: &[
806 "class ",
807 "public class ",
808 "private class ",
809 "protected class ",
810 "abstract class ",
811 "final class ",
812 "public abstract class ",
813 "public final class ",
814 "interface ",
815 "public interface ",
816 "enum ",
817 "public enum ",
818 "record ",
819 "public record ",
820 "@interface ",
821 ],
822 variables: &[],
823 imports: &["import "],
824 tests: &[
826 "@Test",
827 "@ParameterizedTest",
828 "@RepeatedTest",
829 "@TestFactory",
830 "@TestTemplate",
831 ],
832 assertions: &[
833 "assertEquals(",
834 "assertNotEquals(",
835 "assertTrue(",
836 "assertFalse(",
837 "assertNull(",
838 "assertNotNull(",
839 "assertThat(",
840 "assertThrows(",
841 "assertAll(",
842 "assertArrayEquals(",
843 "assertIterableEquals(",
844 "assertLinesMatch(",
845 ],
846 test_suites: &[],
847};
848
849const SP_CSHARP: SymbolPatterns = SymbolPatterns {
850 functions: &[],
851 functions_prefix_paren: &[],
852 classes: &[
853 "class ",
854 "public class ",
855 "private class ",
856 "protected class ",
857 "internal class ",
858 "abstract class ",
859 "sealed class ",
860 "static class ",
861 "partial class ",
862 "public abstract class ",
863 "public sealed class ",
864 "public static class ",
865 "interface ",
866 "public interface ",
867 "internal interface ",
868 "enum ",
869 "public enum ",
870 "struct ",
871 "public struct ",
872 "record ",
873 "public record ",
874 ],
875 variables: &["var "],
876 imports: &["using "],
877 tests: &[
879 "[TestMethod]",
880 "[Test]",
881 "[Fact]",
882 "[Theory]",
883 "[TestCase(",
884 "[DataRow(",
885 "[InlineData(",
886 "[MemberData(",
887 ],
888 assertions: &[
889 "Assert.AreEqual(",
890 "Assert.AreNotEqual(",
891 "Assert.IsTrue(",
892 "Assert.IsFalse(",
893 "Assert.IsNull(",
894 "Assert.IsNotNull(",
895 "Assert.Equal(",
896 "Assert.NotEqual(",
897 "Assert.True(",
898 "Assert.False(",
899 "Assert.That(",
900 "Assert.Contains(",
901 "Assert.Throws(",
902 "Assert.ThrowsAsync(",
903 "Assert.IsInstanceOfType(",
904 ],
905 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
906};
907
908const TEST_PATTERNS_C_CPP: &[&str] = &[
910 "TEST(",
912 "TEST_F(",
913 "TEST_P(",
914 "TYPED_TEST(",
915 "TYPED_TEST_P(",
916 "INSTANTIATE_TEST_SUITE_P(",
917 "INSTANTIATE_TYPED_TEST_SUITE_P(",
918 "TEST_CASE(",
920 "SECTION(",
921 "SCENARIO(",
922 "SCENARIO_METHOD(",
923 "TEST_CASE_METHOD(",
924 "BOOST_AUTO_TEST_CASE(",
926 "BOOST_FIXTURE_TEST_CASE(",
927 "BOOST_AUTO_TEST_SUITE(",
928 "BOOST_PARAM_TEST_CASE(",
929 "CPPUNIT_TEST(",
931 "CPPUNIT_TEST_SUITE(",
932 "RUN_TEST(",
934 "TEST_IGNORE(",
935 "TEST_FAIL(",
936 "START_TEST(",
938 "tcase_add_test(",
939 "suite_create(",
940 "cmocka_unit_test(",
942 "cmocka_run_group_tests(",
943 "IGNORE_TEST(",
945 "TEST_GROUP(",
946 "TEST_GROUP_BASE(",
947];
948
949const ASSERT_PATTERNS_C_CPP: &[&str] = &[
951 "ASSERT_EQ(",
953 "ASSERT_NE(",
954 "ASSERT_LT(",
955 "ASSERT_LE(",
956 "ASSERT_GT(",
957 "ASSERT_GE(",
958 "ASSERT_TRUE(",
959 "ASSERT_FALSE(",
960 "ASSERT_STREQ(",
961 "ASSERT_STRNE(",
962 "ASSERT_FLOAT_EQ(",
963 "ASSERT_DOUBLE_EQ(",
964 "ASSERT_NEAR(",
965 "ASSERT_THROW(",
966 "ASSERT_NO_THROW(",
967 "ASSERT_ANY_THROW(",
968 "EXPECT_EQ(",
970 "EXPECT_NE(",
971 "EXPECT_LT(",
972 "EXPECT_LE(",
973 "EXPECT_GT(",
974 "EXPECT_GE(",
975 "EXPECT_TRUE(",
976 "EXPECT_FALSE(",
977 "EXPECT_STREQ(",
978 "EXPECT_STRNE(",
979 "EXPECT_FLOAT_EQ(",
980 "EXPECT_DOUBLE_EQ(",
981 "EXPECT_NEAR(",
982 "EXPECT_THROW(",
983 "EXPECT_NO_THROW(",
984 "EXPECT_ANY_THROW(",
985 "REQUIRE(",
987 "CHECK(",
988 "REQUIRE_FALSE(",
989 "CHECK_FALSE(",
990 "REQUIRE_NOTHROW(",
991 "CHECK_NOTHROW(",
992 "REQUIRE_THROWS(",
993 "CHECK_THROWS(",
994 "REQUIRE_THAT(",
995 "CHECK_THAT(",
996 "TEST_ASSERT_EQUAL(",
998 "TEST_ASSERT_EQUAL_INT(",
999 "TEST_ASSERT_EQUAL_STRING(",
1000 "TEST_ASSERT_EQUAL_FLOAT(",
1001 "TEST_ASSERT_EQUAL_DOUBLE(",
1002 "TEST_ASSERT_EQUAL_PTR(",
1003 "TEST_ASSERT_TRUE(",
1004 "TEST_ASSERT_FALSE(",
1005 "TEST_ASSERT_NULL(",
1006 "TEST_ASSERT_NOT_NULL(",
1007 "TEST_ASSERT_BITS_HIGH(",
1008 "TEST_ASSERT_BITS_LOW(",
1009 "assert_int_equal(",
1011 "assert_int_not_equal(",
1012 "assert_string_equal(",
1013 "assert_string_not_equal(",
1014 "assert_true(",
1015 "assert_false(",
1016 "assert_null(",
1017 "assert_non_null(",
1018 "assert_ptr_equal(",
1019 "assert_memory_equal(",
1020 "assert_return_code(",
1021];
1022
1023const SUITE_PATTERNS_C_CPP: &[&str] = &[
1025 "TEST_GROUP(",
1026 "TEST_GROUP_BASE(",
1027 "BOOST_AUTO_TEST_SUITE(",
1028 "CPPUNIT_TEST_SUITE(",
1029 "CPPUNIT_TEST_SUITE_END(",
1030];
1031
1032const SP_C: SymbolPatterns = SymbolPatterns {
1033 functions: &[],
1035 functions_prefix_paren: &[
1036 "void ",
1037 "int ",
1038 "char ",
1039 "float ",
1040 "double ",
1041 "long ",
1042 "unsigned ",
1043 "size_t ",
1044 "static ",
1045 "inline ",
1046 "const ",
1047 "extern ",
1048 ],
1049 classes: &[
1050 "struct ",
1051 "typedef struct ",
1052 "union ",
1053 "typedef union ",
1054 "typedef enum ",
1055 ],
1056 variables: &[],
1057 imports: &["#include "],
1058 tests: TEST_PATTERNS_C_CPP,
1059 assertions: ASSERT_PATTERNS_C_CPP,
1060 test_suites: SUITE_PATTERNS_C_CPP,
1061};
1062
1063const SP_CPP: SymbolPatterns = SymbolPatterns {
1064 functions: &[
1066 "virtual ", "explicit ", "~", "operator", ],
1071 functions_prefix_paren: &[
1072 "void ",
1073 "bool ",
1074 "int ",
1075 "char ",
1076 "float ",
1077 "double ",
1078 "long ",
1079 "unsigned ",
1080 "size_t ",
1081 "auto ",
1082 "static ",
1083 "inline ",
1084 "constexpr ",
1085 "const ",
1086 "extern ",
1087 ],
1088 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1090 variables: &[],
1091 imports: &["#include "],
1092 tests: TEST_PATTERNS_C_CPP,
1093 assertions: ASSERT_PATTERNS_C_CPP,
1094 test_suites: SUITE_PATTERNS_C_CPP,
1095};
1096
1097const SP_SHELL: SymbolPatterns = SymbolPatterns {
1098 functions: &["function "],
1099 functions_prefix_paren: &[],
1100 classes: &[],
1101 variables: &["declare ", "local ", "export "],
1102 imports: &["source ", ". "],
1103 tests: &[],
1104 assertions: &[],
1105 test_suites: &[],
1106};
1107
1108const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1109 functions: &["function ", "Function "],
1110 functions_prefix_paren: &[],
1111 classes: &["class "],
1112 variables: &[],
1113 imports: &["Import-Module ", "using "],
1114 tests: &["Describe ", "It ", "Context "],
1116 assertions: &[],
1117 test_suites: &[],
1118};
1119
1120const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1121 functions: &[
1122 "fun ",
1123 "private fun ",
1124 "public fun ",
1125 "protected fun ",
1126 "internal fun ",
1127 "override fun ",
1128 "suspend fun ",
1129 "abstract fun ",
1130 "open fun ",
1131 "private suspend fun ",
1132 "public suspend fun ",
1133 ],
1134 functions_prefix_paren: &[],
1135 classes: &[
1136 "class ",
1137 "data class ",
1138 "sealed class ",
1139 "abstract class ",
1140 "open class ",
1141 "object ",
1142 "companion object",
1143 "interface ",
1144 "enum class ",
1145 "annotation class ",
1146 ],
1147 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1148 imports: &["import "],
1149 tests: &[
1151 "@Test",
1152 "@ParameterizedTest",
1153 "@RepeatedTest",
1154 "\"should ",
1155 "\"it ",
1156 ],
1157 assertions: &[
1158 "assertEquals(",
1159 "assertNotEquals(",
1160 "assertTrue(",
1161 "assertFalse(",
1162 "assertNull(",
1163 "assertNotNull(",
1164 "assertThat(",
1165 "assertThrows(",
1166 "shouldBe(",
1167 "shouldNotBe(",
1168 "shouldThrow(",
1169 ],
1170 test_suites: &[],
1171};
1172
1173const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1174 functions: &[
1175 "func ",
1176 "private func ",
1177 "public func ",
1178 "internal func ",
1179 "override func ",
1180 "open func ",
1181 "static func ",
1182 "class func ",
1183 "mutating func ",
1184 "private static func ",
1185 "public static func ",
1186 ],
1187 functions_prefix_paren: &[],
1188 classes: &[
1189 "class ",
1190 "struct ",
1191 "protocol ",
1192 "enum ",
1193 "extension ",
1194 "actor ",
1195 "public class ",
1196 "private class ",
1197 "open class ",
1198 "final class ",
1199 "public struct ",
1200 "private struct ",
1201 "public protocol ",
1202 ],
1203 variables: &[
1204 "var ",
1205 "let ",
1206 "private var ",
1207 "private let ",
1208 "static var ",
1209 "static let ",
1210 ],
1211 imports: &["import "],
1212 tests: &["func test", "func Test", "@Test"],
1214 assertions: &[
1215 "XCTAssertEqual(",
1216 "XCTAssertNotEqual(",
1217 "XCTAssertTrue(",
1218 "XCTAssertFalse(",
1219 "XCTAssertNil(",
1220 "XCTAssertNotNil(",
1221 "XCTAssertGreaterThan(",
1222 "XCTAssertLessThan(",
1223 "XCTAssertThrowsError(",
1224 "XCTAssertNoThrow(",
1225 "#expect(",
1226 ],
1227 test_suites: &[],
1228};
1229
1230const SP_RUBY: SymbolPatterns = SymbolPatterns {
1231 functions: &["def ", "private def ", "protected def "],
1232 functions_prefix_paren: &[],
1233 classes: &["class ", "module "],
1234 variables: &[],
1235 imports: &["require ", "require_relative "],
1236 tests: &["it ", "it(", "describe ", "context ", "test "],
1238 assertions: &[],
1239 test_suites: &[],
1240};
1241
1242const SP_SCALA: SymbolPatterns = SymbolPatterns {
1243 functions: &["def ", "private def ", "protected def ", "override def "],
1244 functions_prefix_paren: &[],
1245 classes: &[
1246 "class ",
1247 "case class ",
1248 "abstract class ",
1249 "sealed class ",
1250 "object ",
1251 "trait ",
1252 ],
1253 variables: &["val ", "var ", "lazy val "],
1254 imports: &["import "],
1255 tests: &["test(", "it(", "describe("],
1257 assertions: &[],
1258 test_suites: &[],
1259};
1260
1261const SP_PHP: SymbolPatterns = SymbolPatterns {
1262 functions: &[
1263 "function ",
1264 "public function ",
1265 "private function ",
1266 "protected function ",
1267 "static function ",
1268 "abstract function ",
1269 "final function ",
1270 "public static function ",
1271 "private static function ",
1272 "protected static function ",
1273 ],
1274 functions_prefix_paren: &[],
1275 classes: &[
1276 "class ",
1277 "abstract class ",
1278 "final class ",
1279 "interface ",
1280 "trait ",
1281 "enum ",
1282 ],
1283 variables: &[],
1284 imports: &[
1285 "use ",
1286 "require ",
1287 "require_once ",
1288 "include ",
1289 "include_once ",
1290 ],
1291 tests: &[
1293 "public function test",
1294 "function test",
1295 "#[Test]",
1296 "#[DataProvider(",
1297 ],
1298 assertions: &[],
1299 test_suites: &[],
1300};
1301
1302const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1303 functions: &[
1304 "def ",
1305 "defp ",
1306 "defmacro ",
1307 "defmacrop ",
1308 "defguard ",
1309 "defguardp ",
1310 ],
1311 functions_prefix_paren: &[],
1312 classes: &["defmodule ", "defprotocol ", "defimpl "],
1313 variables: &[],
1314 imports: &["import ", "alias ", "use ", "require "],
1315 tests: &["test ", "describe "],
1317 assertions: &[],
1318 test_suites: &[],
1319};
1320
1321const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1322 functions: &[],
1323 functions_prefix_paren: &[],
1324 classes: &["-module("],
1325 variables: &[],
1326 imports: &["-import(", "-include(", "-include_lib("],
1327 tests: &[],
1328 assertions: &[],
1329 test_suites: &[],
1330};
1331
1332const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1333 functions: &[
1334 "let ",
1335 "let rec ",
1336 "member ",
1337 "override ",
1338 "abstract member ",
1339 ],
1340 functions_prefix_paren: &[],
1341 classes: &["type "],
1342 variables: &["let mutable "],
1343 imports: &["open "],
1344 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1346 assertions: &[],
1347 test_suites: &[],
1348};
1349
1350const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1351 functions: &["def ", "private def ", "public def ", "protected def "],
1352 functions_prefix_paren: &[],
1353 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1354 variables: &[],
1355 imports: &["import "],
1356 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1358 assertions: &[],
1359 test_suites: &[],
1360};
1361
1362const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1363 functions: &[],
1364 functions_prefix_paren: &[],
1365 classes: &["class ", "data ", "newtype ", "type "],
1366 variables: &[],
1367 imports: &["import "],
1368 tests: &[],
1369 assertions: &[],
1370 test_suites: &[],
1371};
1372
1373const SP_LUA: SymbolPatterns = SymbolPatterns {
1374 functions: &["function ", "local function "],
1375 functions_prefix_paren: &[],
1376 classes: &[],
1377 variables: &["local "],
1378 imports: &[],
1379 tests: &["it(", "describe(", "pending("],
1381 assertions: &[],
1382 test_suites: &[],
1383};
1384
1385const SP_NIM: SymbolPatterns = SymbolPatterns {
1386 functions: &[
1387 "proc ",
1388 "func ",
1389 "method ",
1390 "iterator ",
1391 "converter ",
1392 "template ",
1393 "macro ",
1394 ],
1395 functions_prefix_paren: &[],
1396 classes: &["type "],
1397 variables: &["var ", "let ", "const "],
1398 imports: &["import ", "from "],
1399 tests: &["test "],
1401 assertions: &[],
1402 test_suites: &[],
1403};
1404
1405const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1406 functions: &["- (", "+ ("],
1407 functions_prefix_paren: &[],
1408 classes: &["@interface ", "@implementation ", "@protocol "],
1409 variables: &[],
1410 imports: &["#import ", "#include "],
1411 tests: &["- (void)test"],
1413 assertions: &[
1414 "XCTAssertEqual(",
1415 "XCTAssertNotEqual(",
1416 "XCTAssertTrue(",
1417 "XCTAssertFalse(",
1418 "XCTAssertNil(",
1419 "XCTAssertNotNil(",
1420 "XCTAssertGreaterThan(",
1421 "XCTAssertLessThan(",
1422 "XCTAssertThrowsError(",
1423 "XCTAssertNoThrow(",
1424 ],
1425 test_suites: &[],
1426};
1427
1428const SP_OCAML: SymbolPatterns = SymbolPatterns {
1429 functions: &["let ", "let rec "],
1430 functions_prefix_paren: &[],
1431 classes: &["type ", "module ", "class "],
1432 variables: &[],
1433 imports: &["open "],
1434 tests: &[],
1435 assertions: &[],
1436 test_suites: &[],
1437};
1438
1439const SP_PERL: SymbolPatterns = SymbolPatterns {
1440 functions: &["sub "],
1441 functions_prefix_paren: &[],
1442 classes: &["package "],
1443 variables: &["my ", "our ", "local "],
1444 imports: &["use ", "require "],
1445 tests: &[],
1446 assertions: &[],
1447 test_suites: &[],
1448};
1449
1450const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1451 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1452 functions_prefix_paren: &[],
1453 classes: &[
1454 "(defrecord ",
1455 "(defprotocol ",
1456 "(deftype ",
1457 "(definterface ",
1458 ],
1459 variables: &["(def ", "(defonce "],
1460 imports: &["(ns ", "(require "],
1461 tests: &["(deftest ", "(testing "],
1463 assertions: &[],
1464 test_suites: &[],
1465};
1466
1467const SP_JULIA: SymbolPatterns = SymbolPatterns {
1468 functions: &["function ", "macro "],
1469 functions_prefix_paren: &[],
1470 classes: &[
1471 "struct ",
1472 "mutable struct ",
1473 "abstract type ",
1474 "primitive type ",
1475 ],
1476 variables: &["const "],
1477 imports: &["import ", "using "],
1478 tests: &["@test ", "@testset "],
1480 assertions: &[],
1481 test_suites: &[],
1482};
1483
1484const SP_DART: SymbolPatterns = SymbolPatterns {
1485 functions: &[],
1486 functions_prefix_paren: &[],
1487 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1488 variables: &["var ", "final ", "const ", "late "],
1489 imports: &["import "],
1490 tests: &["test(", "testWidgets(", "group("],
1492 assertions: &[],
1493 test_suites: &[],
1494};
1495
1496const SP_R: SymbolPatterns = SymbolPatterns {
1497 functions: &[],
1498 functions_prefix_paren: &[],
1499 classes: &[],
1500 variables: &[],
1501 imports: &["library(", "source("],
1502 tests: &["test_that(", "it(", "describe(", "expect_"],
1504 assertions: &[],
1505 test_suites: &[],
1506};
1507
1508const SP_SQL: SymbolPatterns = SymbolPatterns {
1509 functions: &[
1510 "create function ",
1511 "create or replace function ",
1512 "create procedure ",
1513 "create or replace procedure ",
1514 "CREATE FUNCTION ",
1515 "CREATE OR REPLACE FUNCTION ",
1516 "CREATE PROCEDURE ",
1517 "CREATE OR REPLACE PROCEDURE ",
1518 ],
1519 functions_prefix_paren: &[],
1520 classes: &[
1521 "create table ",
1522 "create view ",
1523 "create schema ",
1524 "CREATE TABLE ",
1525 "CREATE VIEW ",
1526 "CREATE SCHEMA ",
1527 ],
1528 variables: &["declare ", "DECLARE "],
1529 imports: &[],
1530 tests: &[],
1531 assertions: &[],
1532 test_suites: &[],
1533};
1534
1535const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1536 functions: &["proc ", "PROC "],
1537 functions_prefix_paren: &[],
1538 classes: &[],
1539 variables: &[],
1540 imports: &["include ", "INCLUDE ", "%include "],
1541 tests: &[],
1542 assertions: &[],
1543 test_suites: &[],
1544};
1545
1546const SP_ZIG: SymbolPatterns = SymbolPatterns {
1547 functions: &[
1548 "fn ",
1549 "pub fn ",
1550 "export fn ",
1551 "inline fn ",
1552 "pub inline fn ",
1553 ],
1554 functions_prefix_paren: &[],
1555 classes: &[],
1556 variables: &["var ", "pub var "],
1557 imports: &[],
1558 tests: &["test \"", "test{"],
1560 assertions: &[],
1561 test_suites: &[],
1562};
1563
1564#[allow(clippy::struct_excessive_bools)]
1568#[derive(Clone, Copy)]
1569struct StaticLangConfig {
1570 line_comments: &'static [&'static str],
1571 block_comment: Option<(&'static str, &'static str)>,
1572 allow_single_quote_strings: bool,
1573 allow_double_quote_strings: bool,
1574 allow_triple_quote_strings: bool,
1575 allow_csharp_verbatim_strings: bool,
1576 symbol_patterns: SymbolPatterns,
1577 has_preprocessor: bool,
1579}
1580
1581#[allow(clippy::struct_excessive_bools)]
1582#[derive(Debug, Clone)]
1583struct ScanConfig {
1584 line_comments: &'static [&'static str],
1585 block_comment: Option<(&'static str, &'static str)>,
1586 allow_single_quote_strings: bool,
1587 allow_double_quote_strings: bool,
1588 allow_triple_quote_strings: bool,
1589 allow_csharp_verbatim_strings: bool,
1590 skip_lines: HashSet<usize>,
1591 symbol_patterns: SymbolPatterns,
1592}
1593
1594const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
1604 line_comments: &["//"],
1605 block_comment: Some(("/*", "*/")),
1606 allow_single_quote_strings: true,
1607 allow_double_quote_strings: true,
1608 allow_triple_quote_strings: false,
1609 allow_csharp_verbatim_strings: false,
1610 symbol_patterns: SP_NONE,
1611 has_preprocessor: false,
1612};
1613
1614const HASH_BASE: StaticLangConfig = StaticLangConfig {
1618 line_comments: &["#"],
1619 block_comment: None,
1620 allow_single_quote_strings: true,
1621 allow_double_quote_strings: true,
1622 allow_triple_quote_strings: false,
1623 allow_csharp_verbatim_strings: false,
1624 symbol_patterns: SP_NONE,
1625 has_preprocessor: false,
1626};
1627
1628static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1632 (
1634 Language::C,
1635 StaticLangConfig {
1636 symbol_patterns: SP_C,
1637 has_preprocessor: true,
1638 ..C_SLASH_BASE
1639 },
1640 ),
1641 (
1642 Language::Cpp,
1643 StaticLangConfig {
1644 symbol_patterns: SP_CPP,
1645 has_preprocessor: true,
1646 ..C_SLASH_BASE
1647 },
1648 ),
1649 (
1650 Language::ObjectiveC,
1651 StaticLangConfig {
1652 symbol_patterns: SP_OBJECTIVEC,
1653 has_preprocessor: true,
1654 ..C_SLASH_BASE
1655 },
1656 ),
1657 (
1659 Language::CSharp,
1660 StaticLangConfig {
1661 symbol_patterns: SP_CSHARP,
1662 allow_csharp_verbatim_strings: true,
1663 ..C_SLASH_BASE
1664 },
1665 ),
1666 (
1667 Language::Go,
1668 StaticLangConfig {
1669 symbol_patterns: SP_GO,
1670 ..C_SLASH_BASE
1671 },
1672 ),
1673 (
1674 Language::Java,
1675 StaticLangConfig {
1676 symbol_patterns: SP_JAVA,
1677 ..C_SLASH_BASE
1678 },
1679 ),
1680 (
1681 Language::JavaScript,
1682 StaticLangConfig {
1683 symbol_patterns: SP_JS,
1684 ..C_SLASH_BASE
1685 },
1686 ),
1687 (
1688 Language::TypeScript,
1689 StaticLangConfig {
1690 symbol_patterns: SP_TS,
1691 ..C_SLASH_BASE
1692 },
1693 ),
1694 (
1695 Language::Svelte,
1696 StaticLangConfig {
1697 symbol_patterns: SP_JS,
1698 ..C_SLASH_BASE
1699 },
1700 ),
1701 (
1702 Language::Vue,
1703 StaticLangConfig {
1704 symbol_patterns: SP_JS,
1705 ..C_SLASH_BASE
1706 },
1707 ),
1708 (
1709 Language::Dart,
1710 StaticLangConfig {
1711 symbol_patterns: SP_DART,
1712 ..C_SLASH_BASE
1713 },
1714 ),
1715 (
1716 Language::Groovy,
1717 StaticLangConfig {
1718 symbol_patterns: SP_GROOVY,
1719 ..C_SLASH_BASE
1720 },
1721 ),
1722 (
1723 Language::Kotlin,
1724 StaticLangConfig {
1725 symbol_patterns: SP_KOTLIN,
1726 ..C_SLASH_BASE
1727 },
1728 ),
1729 (
1730 Language::Scala,
1731 StaticLangConfig {
1732 symbol_patterns: SP_SCALA,
1733 ..C_SLASH_BASE
1734 },
1735 ),
1736 (
1737 Language::Scss,
1738 StaticLangConfig {
1739 symbol_patterns: SP_NONE,
1740 ..C_SLASH_BASE
1741 },
1742 ),
1743 (
1745 Language::Rust,
1746 StaticLangConfig {
1747 symbol_patterns: SP_RUST,
1748 allow_single_quote_strings: false,
1749 ..C_SLASH_BASE
1750 },
1751 ),
1752 (
1754 Language::Swift,
1755 StaticLangConfig {
1756 symbol_patterns: SP_SWIFT,
1757 allow_single_quote_strings: false,
1758 ..C_SLASH_BASE
1759 },
1760 ),
1761 (
1763 Language::Zig,
1764 StaticLangConfig {
1765 symbol_patterns: SP_ZIG,
1766 block_comment: None,
1767 ..C_SLASH_BASE
1768 },
1769 ),
1770 (
1772 Language::FSharp,
1773 StaticLangConfig {
1774 line_comments: &["//"],
1775 block_comment: Some(("(*", "*)")),
1776 allow_single_quote_strings: false,
1777 allow_double_quote_strings: true,
1778 symbol_patterns: SP_FSHARP,
1779 ..C_SLASH_BASE
1780 },
1781 ),
1782 (
1784 Language::Shell,
1785 StaticLangConfig {
1786 symbol_patterns: SP_SHELL,
1787 ..HASH_BASE
1788 },
1789 ),
1790 (
1791 Language::Elixir,
1792 StaticLangConfig {
1793 symbol_patterns: SP_ELIXIR,
1794 ..HASH_BASE
1795 },
1796 ),
1797 (
1798 Language::Perl,
1799 StaticLangConfig {
1800 symbol_patterns: SP_PERL,
1801 ..HASH_BASE
1802 },
1803 ),
1804 (
1805 Language::R,
1806 StaticLangConfig {
1807 symbol_patterns: SP_R,
1808 ..HASH_BASE
1809 },
1810 ),
1811 (
1812 Language::Ruby,
1813 StaticLangConfig {
1814 symbol_patterns: SP_RUBY,
1815 ..HASH_BASE
1816 },
1817 ),
1818 (
1820 Language::Python,
1821 StaticLangConfig {
1822 symbol_patterns: SP_PYTHON,
1823 allow_triple_quote_strings: true,
1824 ..HASH_BASE
1825 },
1826 ),
1827 (
1829 Language::PowerShell,
1830 StaticLangConfig {
1831 symbol_patterns: SP_POWERSHELL,
1832 block_comment: Some(("<#", "#>")),
1833 ..HASH_BASE
1834 },
1835 ),
1836 (
1838 Language::Nim,
1839 StaticLangConfig {
1840 symbol_patterns: SP_NIM,
1841 block_comment: Some(("#[", "]#")),
1842 ..HASH_BASE
1843 },
1844 ),
1845 (
1847 Language::Makefile,
1848 StaticLangConfig {
1849 symbol_patterns: SP_NONE,
1850 allow_single_quote_strings: false,
1851 allow_double_quote_strings: false,
1852 ..HASH_BASE
1853 },
1854 ),
1855 (
1856 Language::Dockerfile,
1857 StaticLangConfig {
1858 symbol_patterns: SP_NONE,
1859 allow_single_quote_strings: false,
1860 allow_double_quote_strings: false,
1861 ..HASH_BASE
1862 },
1863 ),
1864 (
1867 Language::Css,
1868 StaticLangConfig {
1869 line_comments: &[],
1870 block_comment: Some(("/*", "*/")),
1871 symbol_patterns: SP_NONE,
1872 ..C_SLASH_BASE
1873 },
1874 ),
1875 (
1877 Language::Html,
1878 StaticLangConfig {
1879 line_comments: &[],
1880 block_comment: Some(("<!--", "-->")),
1881 allow_single_quote_strings: false,
1882 allow_double_quote_strings: false,
1883 symbol_patterns: SP_NONE,
1884 ..C_SLASH_BASE
1885 },
1886 ),
1887 (
1888 Language::Xml,
1889 StaticLangConfig {
1890 line_comments: &[],
1891 block_comment: Some(("<!--", "-->")),
1892 allow_single_quote_strings: false,
1893 allow_double_quote_strings: false,
1894 symbol_patterns: SP_NONE,
1895 ..C_SLASH_BASE
1896 },
1897 ),
1898 (
1900 Language::Lua,
1901 StaticLangConfig {
1902 line_comments: &["--"],
1903 block_comment: Some(("--[[", "]]")),
1904 symbol_patterns: SP_LUA,
1905 ..C_SLASH_BASE
1906 },
1907 ),
1908 (
1910 Language::Haskell,
1911 StaticLangConfig {
1912 line_comments: &["--"],
1913 block_comment: Some(("{-", "-}")),
1914 symbol_patterns: SP_HASKELL,
1915 ..C_SLASH_BASE
1916 },
1917 ),
1918 (
1920 Language::Sql,
1921 StaticLangConfig {
1922 line_comments: &["--"],
1923 block_comment: Some(("/*", "*/")),
1924 allow_single_quote_strings: true,
1925 allow_double_quote_strings: false,
1926 symbol_patterns: SP_SQL,
1927 ..C_SLASH_BASE
1928 },
1929 ),
1930 (
1932 Language::Ocaml,
1933 StaticLangConfig {
1934 line_comments: &[],
1935 block_comment: Some(("(*", "*)")),
1936 allow_single_quote_strings: false,
1937 symbol_patterns: SP_OCAML,
1938 ..C_SLASH_BASE
1939 },
1940 ),
1941 (
1943 Language::Assembly,
1944 StaticLangConfig {
1945 line_comments: &[";"],
1946 block_comment: None,
1947 allow_single_quote_strings: false,
1948 allow_double_quote_strings: false,
1949 symbol_patterns: SP_ASSEMBLY,
1950 ..C_SLASH_BASE
1951 },
1952 ),
1953 (
1954 Language::Clojure,
1955 StaticLangConfig {
1956 line_comments: &[";"],
1957 block_comment: None,
1958 allow_single_quote_strings: false,
1959 symbol_patterns: SP_CLOJURE,
1960 ..C_SLASH_BASE
1961 },
1962 ),
1963 (
1965 Language::Erlang,
1966 StaticLangConfig {
1967 line_comments: &["%"],
1968 block_comment: None,
1969 allow_single_quote_strings: false,
1970 symbol_patterns: SP_ERLANG,
1971 ..C_SLASH_BASE
1972 },
1973 ),
1974 (
1976 Language::Php,
1977 StaticLangConfig {
1978 line_comments: &["//", "#"],
1979 block_comment: Some(("/*", "*/")),
1980 symbol_patterns: SP_PHP,
1981 ..C_SLASH_BASE
1982 },
1983 ),
1984 (
1986 Language::Julia,
1987 StaticLangConfig {
1988 line_comments: &["#"],
1989 block_comment: Some(("#=", "=#")),
1990 allow_single_quote_strings: false,
1991 allow_triple_quote_strings: true,
1992 symbol_patterns: SP_JULIA,
1993 ..C_SLASH_BASE
1994 },
1995 ),
1996];
1997
1998#[derive(Debug, Clone, Copy)]
2001struct IeeeFlags {
2002 has_preprocessor_directives: bool,
2004 blank_in_block_comment_as_comment: bool,
2006 collapse_continuation_lines: bool,
2008}
2009
2010#[derive(Debug, Clone, Copy)]
2011enum StringState {
2012 Single(char),
2013 Triple(&'static str),
2014 VerbatimDouble,
2015}
2016
2017#[allow(clippy::struct_excessive_bools)]
2018#[derive(Debug, Default)]
2019struct LineFacts {
2020 has_code: bool,
2021 has_single_comment: bool,
2022 has_multi_comment: bool,
2023 has_docstring: bool,
2024}
2025
2026fn process_string_char(
2030 state: StringState,
2031 chars: &[char],
2032 i: usize,
2033) -> (Option<StringState>, usize) {
2034 match state {
2035 StringState::Single(delim) => {
2036 if chars[i] == '\\' {
2037 return (Some(state), 2); }
2039 if chars[i] == delim {
2040 (None, 1)
2041 } else {
2042 (Some(state), 1)
2043 }
2044 }
2045 StringState::Triple(delim) => {
2046 if starts_with(chars, i, delim) {
2047 (None, delim.len())
2048 } else {
2049 (Some(state), 1)
2050 }
2051 }
2052 StringState::VerbatimDouble => {
2053 if starts_with(chars, i, "\"\"") {
2054 return (Some(state), 2); }
2056 if chars[i] == '"' {
2057 (None, 1)
2058 } else {
2059 (Some(state), 1)
2060 }
2061 }
2062 }
2063}
2064
2065fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2069 if starts_with(chars, i, close) {
2070 (false, close.len())
2071 } else {
2072 (true, 1)
2073 }
2074}
2075
2076fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2080 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2081 return Some((StringState::VerbatimDouble, 2));
2082 }
2083 if config.allow_triple_quote_strings {
2084 if starts_with(chars, i, "\"\"\"") {
2085 return Some((StringState::Triple("\"\"\""), 3));
2086 }
2087 if starts_with(chars, i, "'''") {
2088 return Some((StringState::Triple("'''"), 3));
2089 }
2090 }
2091 if config.allow_single_quote_strings && chars[i] == '\'' {
2092 return Some((StringState::Single('\''), 1));
2093 }
2094 if config.allow_double_quote_strings && chars[i] == '"' {
2095 return Some((StringState::Single('"'), 1));
2096 }
2097 None
2098}
2099
2100fn step_through_block_comment(
2106 chars: &[char],
2107 i: usize,
2108 block_comment: Option<(&'static str, &'static str)>,
2109 in_block_comment: &mut bool,
2110) -> usize {
2111 if let Some((_, close)) = block_comment {
2112 let (still_in, advance) = process_block_comment_char(chars, i, close);
2113 *in_block_comment = still_in;
2114 return advance;
2115 }
2116 0
2117}
2118
2119fn try_open_block_comment(
2122 chars: &[char],
2123 i: usize,
2124 block_comment: Option<(&'static str, &'static str)>,
2125) -> Option<usize> {
2126 let (open, _) = block_comment?;
2127 starts_with(chars, i, open).then_some(open.len())
2128}
2129
2130fn scan_line(
2134 chars: &[char],
2135 config: &ScanConfig,
2136 facts: &mut LineFacts,
2137 in_block_comment: &mut bool,
2138 string_state: &mut Option<StringState>,
2139) {
2140 let mut i = 0usize;
2141 while i < chars.len() {
2142 if let Some(state) = *string_state {
2144 facts.has_code = true;
2145 let (new_state, advance) = process_string_char(state, chars, i);
2146 *string_state = new_state;
2147 i += advance;
2148 continue;
2149 }
2150
2151 if *in_block_comment {
2153 facts.has_multi_comment = true;
2154 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2155 continue;
2156 }
2157
2158 if chars[i].is_whitespace() {
2160 i += 1;
2161 continue;
2162 }
2163
2164 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2166 facts.has_code = true;
2167 *string_state = Some(new_state);
2168 i += advance;
2169 continue;
2170 }
2171
2172 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2174 facts.has_multi_comment = true;
2175 *in_block_comment = true;
2176 i += advance;
2177 continue;
2178 }
2179
2180 if config
2182 .line_comments
2183 .iter()
2184 .any(|prefix| starts_with(chars, i, prefix))
2185 {
2186 facts.has_single_comment = true;
2187 break;
2188 }
2189
2190 facts.has_code = true;
2192 i += 1;
2193 }
2194}
2195
2196fn finalize_line_facts(
2201 facts: LineFacts,
2202 trimmed: &str,
2203 raw: &mut RawLineCounts,
2204 ieee: IeeeFlags,
2205 in_block_comment: bool,
2206 string_state: Option<StringState>,
2207 pending_continuation: &mut Option<LineFacts>,
2208) -> Option<LineFacts> {
2209 if ieee.has_preprocessor_directives
2213 && facts.has_code
2214 && !facts.has_single_comment
2215 && !facts.has_multi_comment
2216 && trimmed.starts_with('#')
2217 {
2218 raw.compiler_directive_lines += 1;
2219 }
2220
2221 let is_continuation = ieee.collapse_continuation_lines
2224 && !in_block_comment
2225 && string_state.is_none()
2226 && trimmed.ends_with('\\');
2227
2228 if is_continuation {
2229 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2230 pending.has_code |= facts.has_code;
2231 pending.has_single_comment |= facts.has_single_comment;
2232 pending.has_multi_comment |= facts.has_multi_comment;
2233 pending.has_docstring |= facts.has_docstring;
2234 return None; }
2236
2237 let emit = if let Some(pending) = pending_continuation.take() {
2239 LineFacts {
2240 has_code: pending.has_code | facts.has_code,
2241 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2242 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2243 has_docstring: pending.has_docstring | facts.has_docstring,
2244 }
2245 } else {
2246 facts
2247 };
2248 Some(emit)
2249}
2250
2251#[allow(clippy::needless_pass_by_value)]
2256#[allow(clippy::too_many_arguments)]
2257#[allow(clippy::many_single_char_names)] fn process_physical_line(
2259 line: &str,
2260 line_idx: usize,
2261 config: &ScanConfig,
2262 raw: &mut RawLineCounts,
2263 in_block_comment: &mut bool,
2264 string_state: &mut Option<StringState>,
2265 pending_continuation: &mut Option<LineFacts>,
2266 ieee: IeeeFlags,
2267) {
2268 raw.total_physical_lines += 1;
2269
2270 if config.skip_lines.contains(&line_idx) {
2271 raw.docstring_comment_lines += 1;
2272 return;
2273 }
2274
2275 let trimmed = line.trim();
2276 let mut facts = LineFacts::default();
2277
2278 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2282 facts.has_multi_comment = true;
2283 }
2284
2285 let chars: Vec<char> = line.chars().collect();
2286 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2287
2288 let Some(emit) = finalize_line_facts(
2289 facts,
2290 trimmed,
2291 raw,
2292 ieee,
2293 *in_block_comment,
2294 *string_state,
2295 pending_continuation,
2296 ) else {
2297 return;
2298 };
2299
2300 classify_line(raw, &emit, trimmed);
2301
2302 if emit.has_code {
2303 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2304 raw.functions += f;
2305 raw.classes += c;
2306 raw.variables += v;
2307 raw.imports += i;
2308 raw.test_count += t;
2309 raw.test_assertion_count += a;
2310 raw.test_suite_count += s;
2311 }
2312}
2313
2314#[allow(clippy::needless_pass_by_value)]
2315fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2316 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2317 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2318
2319 let mut raw = RawLineCounts::default();
2320 let mut warnings = Vec::new();
2321
2322 let mut in_block_comment = false;
2323 let mut string_state: Option<StringState> = None;
2324 let mut pending_continuation: Option<LineFacts> = None;
2326
2327 for (line_idx, line) in lines.iter().enumerate() {
2328 process_physical_line(
2329 line,
2330 line_idx,
2331 &config,
2332 &mut raw,
2333 &mut in_block_comment,
2334 &mut string_state,
2335 &mut pending_continuation,
2336 ieee,
2337 );
2338 }
2339
2340 if let Some(pending) = pending_continuation.take() {
2342 classify_line(&mut raw, &pending, "");
2343 }
2344
2345 if in_block_comment {
2346 warnings.push("unclosed block comment detected; result is best effort".into());
2347 }
2348 if string_state.is_some() {
2349 warnings.push("unclosed string literal detected; result is best effort".into());
2350 }
2351
2352 RawFileAnalysis {
2353 raw,
2354 parse_mode: if warnings.is_empty() {
2355 ParseMode::Lexical
2356 } else {
2357 ParseMode::LexicalBestEffort
2358 },
2359 warnings,
2360 }
2361}
2362
2363const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2364 if facts.has_docstring {
2365 raw.docstring_comment_lines += 1;
2366 } else if !facts.has_code
2367 && !facts.has_single_comment
2368 && !facts.has_multi_comment
2369 && trimmed.is_empty()
2370 {
2371 raw.blank_only_lines += 1;
2372 } else if facts.has_code && facts.has_single_comment {
2373 raw.mixed_code_single_comment_lines += 1;
2374 } else if facts.has_code && facts.has_multi_comment {
2375 raw.mixed_code_multi_comment_lines += 1;
2376 } else if facts.has_code {
2377 raw.code_only_lines += 1;
2378 } else if facts.has_single_comment {
2379 raw.single_comment_only_lines += 1;
2380 } else if facts.has_multi_comment {
2381 raw.multi_comment_only_lines += 1;
2382 } else if trimmed.is_empty() {
2383 raw.blank_only_lines += 1;
2384 } else {
2385 raw.skipped_unknown_lines += 1;
2386 }
2387}
2388
2389fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2390 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2391 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
2394 0
2395 } else if let Some(paren_pos) = trimmed.find('(') {
2396 if trimmed[..paren_pos].contains('=') {
2397 0
2398 } else {
2399 hit(patterns.functions_prefix_paren)
2400 }
2401 } else {
2402 0
2403 };
2404 let test_hit = hit(patterns.tests);
2405 let fn_hit = if test_hit == 0 {
2412 hit(patterns.functions) | fn_pp
2413 } else {
2414 0
2415 };
2416 let class_hit = if test_hit == 0 {
2417 hit(patterns.classes)
2418 } else {
2419 0
2420 };
2421 (
2422 fn_hit,
2423 class_hit,
2424 hit(patterns.variables),
2425 hit(patterns.imports),
2426 test_hit,
2427 hit(patterns.assertions),
2428 hit(patterns.test_suites),
2429 )
2430}
2431
2432fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2433 let needle_chars: Vec<char> = needle.chars().collect();
2434 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2435}
2436
2437#[derive(Debug, Clone)]
2438struct PyContext {
2439 indent: usize,
2440 expect_docstring: bool,
2441}
2442
2443fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2445 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2446 contexts.pop();
2447 }
2448}
2449
2450fn py_handle_pending_indent(
2453 pending_block_indent: &mut Option<usize>,
2454 contexts: &mut Vec<PyContext>,
2455 indent: usize,
2456 trimmed: &str,
2457) {
2458 let Some(base_indent) = *pending_block_indent else {
2459 return;
2460 };
2461 if indent > base_indent {
2462 contexts.push(PyContext {
2463 indent,
2464 expect_docstring: true,
2465 });
2466 *pending_block_indent = None;
2467 } else if !trimmed.starts_with('@') {
2468 *pending_block_indent = None;
2469 }
2470}
2471
2472fn py_try_record_docstring(
2478 ctx: &mut PyContext,
2479 trimmed: &str,
2480 idx: usize,
2481 docstring_lines: &mut HashSet<usize>,
2482 active_docstring: &mut Option<(&'static str, usize)>,
2483) -> bool {
2484 if !ctx.expect_docstring {
2485 return false;
2486 }
2487 if let Some(delim) = docstring_delimiter(trimmed) {
2488 docstring_lines.insert(idx);
2489 ctx.expect_docstring = false;
2490 if !closes_triple_docstring(trimmed, delim, true) {
2491 *active_docstring = Some((delim, idx));
2492 }
2493 return true;
2494 }
2495 ctx.expect_docstring = false;
2496 false
2497}
2498
2499fn track_active_docstring(
2503 active_docstring: &mut Option<(&'static str, usize)>,
2504 docstring_lines: &mut HashSet<usize>,
2505 idx: usize,
2506 trimmed: &str,
2507) -> bool {
2508 let Some((delim, start_line)) = *active_docstring else {
2509 return false;
2510 };
2511 docstring_lines.insert(idx);
2512 if closes_triple_docstring(trimmed, delim, idx == start_line) {
2513 *active_docstring = None;
2514 }
2515 true
2516}
2517
2518fn try_record_docstring_if_context(
2521 contexts: &mut [PyContext],
2522 trimmed: &str,
2523 idx: usize,
2524 docstring_lines: &mut HashSet<usize>,
2525 active_docstring: &mut Option<(&'static str, usize)>,
2526) -> bool {
2527 let Some(ctx) = contexts.last_mut() else {
2528 return false;
2529 };
2530 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2531}
2532
2533fn mark_unclosed_docstring_lines(
2535 active_docstring: Option<&(&'static str, usize)>,
2536 docstring_lines: &mut HashSet<usize>,
2537 num_lines: usize,
2538) {
2539 if let Some(&(_, start_line)) = active_docstring {
2540 for idx in start_line..num_lines {
2541 docstring_lines.insert(idx);
2542 }
2543 }
2544}
2545
2546fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2547 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2548 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2549
2550 let mut docstring_lines = HashSet::new();
2551 let mut contexts = vec![PyContext {
2552 indent: 0,
2553 expect_docstring: true,
2554 }];
2555 let mut pending_block_indent: Option<usize> = None;
2556 let mut active_docstring: Option<(&'static str, usize)> = None;
2557
2558 for (idx, line) in lines.iter().enumerate() {
2559 let trimmed = line.trim();
2560 let indent = leading_indent(line);
2561
2562 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2563 continue;
2564 }
2565
2566 if trimmed.is_empty() || trimmed.starts_with('#') {
2568 continue;
2569 }
2570
2571 py_pop_outdented_contexts(&mut contexts, indent);
2572 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2573
2574 if try_record_docstring_if_context(
2575 &mut contexts,
2576 trimmed,
2577 idx,
2578 &mut docstring_lines,
2579 &mut active_docstring,
2580 ) {
2581 continue;
2582 }
2583
2584 if is_python_block_header(trimmed) {
2585 pending_block_indent = Some(indent);
2586 }
2587 }
2588
2589 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2590
2591 docstring_lines
2592}
2593
2594fn leading_indent(line: &str) -> usize {
2595 line.chars().take_while(|c| c.is_whitespace()).count()
2596}
2597
2598fn is_python_block_header(trimmed: &str) -> bool {
2599 (trimmed.starts_with("def ")
2600 || trimmed.starts_with("async def ")
2601 || trimmed.starts_with("class "))
2602 && trimmed.ends_with(':')
2603}
2604
2605fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2606 let mut idx = 0usize;
2607 let bytes = trimmed.as_bytes();
2608 while idx < bytes.len() {
2609 let c = bytes[idx] as char;
2610 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2611 idx += 1;
2612 continue;
2613 }
2614 break;
2615 }
2616
2617 let rest = &trimmed[idx..];
2618 if rest.starts_with("\"\"\"") {
2619 Some("\"\"\"")
2620 } else if rest.starts_with("'''") {
2621 Some("'''")
2622 } else {
2623 None
2624 }
2625}
2626
2627fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2628 let mut occurrences = 0usize;
2629 let mut search = trimmed;
2630 while let Some(index) = search.find(delim) {
2631 occurrences += 1;
2632 search = &search[index + delim.len()..];
2633 }
2634
2635 if same_line_as_start {
2636 occurrences >= 2
2637 } else {
2638 occurrences >= 1
2639 }
2640}
2641
2642#[cfg(feature = "tree-sitter")]
2647pub mod ts {
2648 use tree_sitter::Node;
2649
2650 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2651
2652 struct SymbolKinds {
2654 function_def: &'static str,
2656 class_def: &'static str,
2658 test_fn_prefix: &'static str,
2661 test_class_prefix: &'static str,
2664 assertion_attr_prefix: &'static str,
2668 }
2669
2670 impl SymbolKinds {
2671 const fn none() -> Self {
2672 Self {
2673 function_def: "",
2674 class_def: "",
2675 test_fn_prefix: "",
2676 test_class_prefix: "",
2677 assertion_attr_prefix: "",
2678 }
2679 }
2680 }
2681
2682 fn analyze_lines(
2688 text: &str,
2689 ts_language: &tree_sitter::Language,
2690 comment_node_kinds: &[&str],
2691 docstring_stmt_kind: Option<&str>,
2692 symbols: &SymbolKinds,
2693 ) -> Option<RawFileAnalysis> {
2694 let mut parser = tree_sitter::Parser::new();
2695 parser.set_language(ts_language).ok()?;
2696 let tree = parser.parse(text, None)?;
2697
2698 let lines: Vec<&str> = text.split_terminator('\n').collect();
2699 let n = lines.len();
2700
2701 let mut has_code = vec![false; n];
2702 let mut has_comment = vec![false; n];
2703 let mut comment_is_block = vec![false; n];
2704 let mut has_docstring = vec![false; n];
2705
2706 let mut ctx = VisitCtx {
2708 source: text.as_bytes(),
2709 comment_kinds: comment_node_kinds,
2710 docstring_stmt_kind,
2711 has_code: &mut has_code,
2712 has_comment: &mut has_comment,
2713 comment_is_block: &mut comment_is_block,
2714 has_docstring: &mut has_docstring,
2715 };
2716 visit(tree.root_node(), &mut ctx);
2717
2718 let mut raw = RawLineCounts::default();
2719 classify_ts_lines(
2720 &lines,
2721 &has_code,
2722 &has_comment,
2723 &comment_is_block,
2724 &has_docstring,
2725 &mut raw,
2726 );
2727
2728 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
2730 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
2731 }
2732
2733 Some(RawFileAnalysis {
2734 raw,
2735 parse_mode: ParseMode::TreeSitter,
2736 warnings: Vec::new(),
2737 })
2738 }
2739
2740 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2742 for i in 0..node.child_count() {
2743 #[allow(clippy::cast_possible_truncation)]
2744 if let Some(child) = node.child(i as u32) {
2745 count_symbols(child, source, kinds, raw);
2746 }
2747 }
2748 }
2749
2750 fn try_count_function(
2752 node: Node,
2753 source: &[u8],
2754 kinds: &SymbolKinds,
2755 raw: &mut RawLineCounts,
2756 ) -> bool {
2757 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
2758 return false;
2759 }
2760 let name = node
2761 .child_by_field_name("name")
2762 .and_then(|n| n.utf8_text(source).ok())
2763 .unwrap_or("");
2764 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
2765 raw.test_count += 1;
2766 } else {
2767 raw.functions += 1;
2768 }
2769 recurse_children(node, source, kinds, raw);
2770 true
2771 }
2772
2773 fn try_count_class(
2775 node: Node,
2776 source: &[u8],
2777 kinds: &SymbolKinds,
2778 raw: &mut RawLineCounts,
2779 ) -> bool {
2780 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
2781 return false;
2782 }
2783 let name = node
2784 .child_by_field_name("name")
2785 .and_then(|n| n.utf8_text(source).ok())
2786 .unwrap_or("");
2787 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
2788 raw.test_count += 1;
2789 } else {
2790 raw.classes += 1;
2791 }
2792 recurse_children(node, source, kinds, raw);
2793 true
2794 }
2795
2796 fn try_count_assertion(
2799 node: Node,
2800 source: &[u8],
2801 kinds: &SymbolKinds,
2802 raw: &mut RawLineCounts,
2803 ) -> bool {
2804 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
2805 return false;
2806 }
2807 let Some(func) = node.child_by_field_name("function") else {
2808 return false;
2809 };
2810 if func.kind() != "attribute" {
2811 return false;
2812 }
2813 let attr_text = func
2814 .child_by_field_name("attribute")
2815 .and_then(|n| n.utf8_text(source).ok())
2816 .unwrap_or("");
2817 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
2818 return false;
2819 }
2820 raw.test_assertion_count += 1;
2821 true
2822 }
2823
2824 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2827 if try_count_function(node, source, kinds, raw) {
2828 return;
2829 }
2830 if try_count_class(node, source, kinds, raw) {
2831 return;
2832 }
2833 if try_count_assertion(node, source, kinds, raw) {
2834 return;
2835 }
2836 recurse_children(node, source, kinds, raw);
2837 }
2838
2839 #[allow(clippy::struct_excessive_bools)]
2842 #[derive(Clone, Copy)]
2843 struct TsLineFlags {
2844 has_code: bool,
2845 has_comment: bool,
2846 comment_is_block: bool,
2847 has_docstring: bool,
2848 }
2849
2850 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2852 if trimmed.is_empty() {
2853 raw.blank_only_lines += 1;
2854 } else if flags.has_docstring && !flags.has_code {
2855 raw.docstring_comment_lines += 1;
2856 } else if flags.has_code && flags.has_comment {
2857 if flags.comment_is_block {
2859 raw.mixed_code_multi_comment_lines += 1;
2860 } else {
2861 raw.mixed_code_single_comment_lines += 1;
2862 }
2863 } else if flags.has_comment {
2864 if flags.comment_is_block {
2865 raw.multi_comment_only_lines += 1;
2866 } else {
2867 raw.single_comment_only_lines += 1;
2868 }
2869 } else {
2870 raw.code_only_lines += 1;
2871 }
2872 }
2873
2874 fn classify_ts_lines(
2876 lines: &[&str],
2877 has_code: &[bool],
2878 has_comment: &[bool],
2879 comment_is_block: &[bool],
2880 has_docstring: &[bool],
2881 raw: &mut RawLineCounts,
2882 ) {
2883 for i in 0..lines.len() {
2884 raw.total_physical_lines += 1;
2885 classify_ts_line(
2886 lines[i].trim(),
2887 TsLineFlags {
2888 has_code: has_code[i],
2889 has_comment: has_comment[i],
2890 comment_is_block: comment_is_block[i],
2891 has_docstring: has_docstring[i],
2892 },
2893 raw,
2894 );
2895 }
2896 }
2897
2898 struct VisitCtx<'a> {
2899 source: &'a [u8],
2900 comment_kinds: &'a [&'a str],
2901 docstring_stmt_kind: Option<&'a str>,
2902 has_code: &'a mut Vec<bool>,
2903 has_comment: &'a mut Vec<bool>,
2904 comment_is_block: &'a mut Vec<bool>,
2905 has_docstring: &'a mut Vec<bool>,
2906 }
2907
2908 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2910 let start_row = node.start_position().row;
2911 let end_row = node.end_position().row;
2912 let first_two = node
2913 .utf8_text(ctx.source)
2914 .unwrap_or("")
2915 .get(..2)
2916 .unwrap_or("");
2917 let is_block = first_two == "/*" || first_two == "<#";
2918 for row in start_row..=end_row {
2919 if row < ctx.has_comment.len() {
2920 ctx.has_comment[row] = true;
2921 if is_block {
2922 ctx.comment_is_block[row] = true;
2923 }
2924 }
2925 }
2926 }
2927
2928 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2931 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2932 return false;
2933 };
2934 if kind != stmt_kind || node.named_child_count() != 1 {
2935 return false;
2936 }
2937 let Some(child) = node.named_child(0) else {
2938 return false;
2939 };
2940 if child.kind() != "string" {
2941 return false;
2942 }
2943 let child_start = child.start_position().row;
2944 let child_end = child.end_position().row;
2945 for row in child_start..=child_end {
2946 if row < ctx.has_docstring.len() {
2947 ctx.has_docstring[row] = true;
2948 }
2949 }
2950 true
2951 }
2952
2953 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2955 let start_row = node.start_position().row;
2956 let end_row = node.end_position().row;
2957 for row in start_row..=end_row {
2958 if row < ctx.has_code.len() {
2959 ctx.has_code[row] = true;
2960 }
2961 }
2962 }
2963
2964 #[allow(clippy::too_many_lines)]
2965 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2966 let kind = node.kind();
2967
2968 if ctx.comment_kinds.contains(&kind) {
2970 visit_comment_node(node, ctx);
2971 return;
2972 }
2973
2974 if visit_maybe_docstring(node, kind, ctx) {
2976 return;
2977 }
2978
2979 if node.child_count() == 0 && !node.is_extra() {
2981 visit_leaf_code(node, ctx);
2982 return;
2983 }
2984
2985 for i in 0..node.child_count() {
2986 #[allow(clippy::cast_possible_truncation)]
2987 if let Some(child) = node.child(i as u32) {
2989 visit(child, ctx);
2990 }
2991 }
2992 }
2993
2994 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
2995
2996 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
2997 function_def: "function_definition",
2998 class_def: "class_definition",
2999 test_fn_prefix: "test_",
3000 test_class_prefix: "Test",
3001 assertion_attr_prefix: "assert",
3002 };
3003
3004 #[must_use]
3006 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3007 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3008 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3009 }
3010
3011 #[must_use]
3013 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3014 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3015 analyze_lines(
3016 text,
3017 &lang,
3018 &["comment"],
3019 Some("expression_statement"),
3020 &PYTHON_SYMBOLS,
3021 )
3022 }
3023}
3024
3025#[cfg(test)]
3026mod tests {
3027 use super::*;
3028
3029 #[test]
3030 fn python_docstrings_are_separated() {
3031 let input = r#""""module docs"""
3032
3033
3034def fn_a():
3035 """function docs"""
3036 value = 1 # trailing comment
3037 return value
3038"#;
3039
3040 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3041 assert_eq!(result.raw.docstring_comment_lines, 2);
3042 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3043 assert_eq!(result.raw.code_only_lines, 2);
3044 }
3045
3046 #[test]
3047 fn c_style_mixed_lines_are_captured() {
3048 let input = "int x = 1; // note\n/* block */\n";
3049 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3050 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3051 assert_eq!(result.raw.multi_comment_only_lines, 1);
3052 }
3053
3054 #[test]
3055 fn detect_language_by_shebang() {
3056 let language = detect_language(
3057 Path::new("script"),
3058 Some("#!/usr/bin/env bash"),
3059 &BTreeMap::new(),
3060 true,
3061 );
3062 assert_eq!(language, Some(Language::Shell));
3063 }
3064
3065 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3068 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3069 let r = &result.raw;
3070 (
3071 r.functions,
3072 r.classes,
3073 r.variables,
3074 r.imports,
3075 r.test_count,
3076 r.test_assertion_count,
3077 r.test_suite_count,
3078 )
3079 }
3080
3081 #[test]
3082 fn python_test_fn_not_double_counted() {
3083 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
3085 assert_eq!(f, 0, "test fn must not also increment functions");
3086 assert_eq!(t, 1, "must be counted as a test");
3087 assert_eq!(c, 0);
3088 }
3089
3090 #[test]
3091 fn python_test_class_not_double_counted() {
3092 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
3094 assert_eq!(c, 0, "test class must not also increment classes");
3095 assert_eq!(t, 1, "must be counted as a test");
3096 assert_eq!(f, 0);
3097 }
3098
3099 #[test]
3100 fn python_regular_fn_counts_as_function() {
3101 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
3102 assert_eq!(f, 1, "regular function must be counted");
3103 assert_eq!(t, 0);
3104 assert_eq!(c, 0);
3105 }
3106
3107 #[test]
3108 fn python_regular_class_counts_as_class() {
3109 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
3110 assert_eq!(c, 1, "regular class must be counted");
3111 assert_eq!(t, 0);
3112 assert_eq!(f, 0);
3113 }
3114
3115 #[test]
3116 fn go_test_fn_not_double_counted() {
3117 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
3118 assert_eq!(f, 0, "Go test func must not also increment functions");
3119 assert_eq!(t, 1, "must be counted as a test");
3120 }
3121
3122 #[test]
3123 fn go_benchmark_fn_not_double_counted() {
3124 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
3125 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
3126 assert_eq!(t, 1, "must be counted as a test");
3127 }
3128
3129 #[test]
3130 fn go_regular_fn_counts_as_function() {
3131 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
3132 assert_eq!(f, 1, "regular Go func must be counted");
3133 assert_eq!(t, 0);
3134 }
3135
3136 #[test]
3137 fn rust_test_attr_counts_as_test_not_function() {
3138 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
3140 assert_eq!(t, 1, "#[test] must be counted as a test");
3141 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
3142 }
3143
3144 #[test]
3145 fn rust_fn_line_counts_as_function_not_test() {
3146 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
3148 assert_eq!(f, 1, "fn declaration must count as a function");
3149 assert_eq!(
3150 t, 0,
3151 "fn declaration line must not be double-counted as a test"
3152 );
3153 }
3154
3155 #[test]
3156 fn js_describe_counts_as_test_not_function() {
3157 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
3158 assert_eq!(t, 1, "describe must be counted as a test");
3159 assert_eq!(f, 0, "describe must not be counted as a function");
3160 }
3161
3162 #[test]
3163 fn js_regular_fn_counts_as_function() {
3164 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
3165 assert_eq!(f, 1, "JS function declaration must be counted");
3166 assert_eq!(t, 0);
3167 }
3168}