1use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12 C,
13 Cpp,
14 CSharp,
15 Go,
16 Java,
17 JavaScript,
18 Python,
19 Rust,
20 Shell,
21 PowerShell,
22 TypeScript,
23 Assembly,
25 Clojure,
26 Css,
27 Dart,
28 Dockerfile,
29 Elixir,
30 Erlang,
31 FSharp,
32 Groovy,
33 Haskell,
34 Html,
35 Julia,
36 Kotlin,
37 Lua,
38 Makefile,
39 Nim,
40 ObjectiveC,
41 Ocaml,
42 Perl,
43 Php,
44 R,
45 Ruby,
46 Scala,
47 Scss,
48 Sql,
49 Svelte,
50 Swift,
51 Vue,
52 Xml,
53 Zig,
54}
55
56impl Language {
57 #[must_use]
58 pub const fn display_name(&self) -> &'static str {
59 match self {
60 Self::C => "C",
61 Self::Cpp => "C++",
62 Self::CSharp => "C#",
63 Self::Go => "Go",
64 Self::Java => "Java",
65 Self::JavaScript => "JavaScript",
66 Self::Python => "Python",
67 Self::Rust => "Rust",
68 Self::Shell => "Shell",
69 Self::PowerShell => "PowerShell",
70 Self::TypeScript => "TypeScript",
71 Self::Assembly => "Assembly",
72 Self::Clojure => "Clojure",
73 Self::Css => "CSS",
74 Self::Dart => "Dart",
75 Self::Dockerfile => "Dockerfile",
76 Self::Elixir => "Elixir",
77 Self::Erlang => "Erlang",
78 Self::FSharp => "F#",
79 Self::Groovy => "Groovy",
80 Self::Haskell => "Haskell",
81 Self::Html => "HTML",
82 Self::Julia => "Julia",
83 Self::Kotlin => "Kotlin",
84 Self::Lua => "Lua",
85 Self::Makefile => "Makefile",
86 Self::Nim => "Nim",
87 Self::ObjectiveC => "Objective-C",
88 Self::Ocaml => "OCaml",
89 Self::Perl => "Perl",
90 Self::Php => "PHP",
91 Self::R => "R",
92 Self::Ruby => "Ruby",
93 Self::Scala => "Scala",
94 Self::Scss => "SCSS",
95 Self::Sql => "SQL",
96 Self::Svelte => "Svelte",
97 Self::Swift => "Swift",
98 Self::Vue => "Vue",
99 Self::Xml => "XML",
100 Self::Zig => "Zig",
101 }
102 }
103
104 #[must_use]
105 pub const fn as_slug(&self) -> &'static str {
106 match self {
107 Self::C => "c",
108 Self::Cpp => "cpp",
109 Self::CSharp => "csharp",
110 Self::Go => "go",
111 Self::Java => "java",
112 Self::JavaScript => "javascript",
113 Self::Python => "python",
114 Self::Rust => "rust",
115 Self::Shell => "shell",
116 Self::PowerShell => "powershell",
117 Self::TypeScript => "typescript",
118 Self::Assembly => "assembly",
119 Self::Clojure => "clojure",
120 Self::Css => "css",
121 Self::Dart => "dart",
122 Self::Dockerfile => "dockerfile",
123 Self::Elixir => "elixir",
124 Self::Erlang => "erlang",
125 Self::FSharp => "fsharp",
126 Self::Groovy => "groovy",
127 Self::Haskell => "haskell",
128 Self::Html => "html",
129 Self::Julia => "julia",
130 Self::Kotlin => "kotlin",
131 Self::Lua => "lua",
132 Self::Makefile => "makefile",
133 Self::Nim => "nim",
134 Self::ObjectiveC => "objectivec",
135 Self::Ocaml => "ocaml",
136 Self::Perl => "perl",
137 Self::Php => "php",
138 Self::R => "r",
139 Self::Ruby => "ruby",
140 Self::Scala => "scala",
141 Self::Scss => "scss",
142 Self::Sql => "sql",
143 Self::Svelte => "svelte",
144 Self::Swift => "swift",
145 Self::Vue => "vue",
146 Self::Xml => "xml",
147 Self::Zig => "zig",
148 }
149 }
150
151 #[must_use]
152 pub fn from_name(name: &str) -> Option<Self> {
153 match name.trim().to_ascii_lowercase().as_str() {
154 "c" => Some(Self::C),
155 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
156 "csharp" | "c#" | "cs" => Some(Self::CSharp),
157 "go" | "golang" => Some(Self::Go),
158 "java" => Some(Self::Java),
159 "javascript" | "js" => Some(Self::JavaScript),
160 "python" | "py" => Some(Self::Python),
161 "rust" | "rs" => Some(Self::Rust),
162 "shell" | "sh" | "bash" => Some(Self::Shell),
163 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
164 "typescript" | "ts" => Some(Self::TypeScript),
165 "assembly" | "asm" => Some(Self::Assembly),
166 "clojure" | "clj" => Some(Self::Clojure),
167 "css" => Some(Self::Css),
168 "dart" => Some(Self::Dart),
169 "dockerfile" | "docker" => Some(Self::Dockerfile),
170 "elixir" | "ex" => Some(Self::Elixir),
171 "erlang" | "erl" => Some(Self::Erlang),
172 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
173 "groovy" => Some(Self::Groovy),
174 "haskell" | "hs" => Some(Self::Haskell),
175 "html" | "htm" => Some(Self::Html),
176 "julia" | "jl" => Some(Self::Julia),
177 "kotlin" | "kt" => Some(Self::Kotlin),
178 "lua" => Some(Self::Lua),
179 "makefile" | "make" | "mk" => Some(Self::Makefile),
180 "nim" => Some(Self::Nim),
181 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
182 "ocaml" | "ml" => Some(Self::Ocaml),
183 "perl" | "pl" => Some(Self::Perl),
184 "php" => Some(Self::Php),
185 "r" => Some(Self::R),
186 "ruby" | "rb" => Some(Self::Ruby),
187 "scala" => Some(Self::Scala),
188 "scss" | "sass" => Some(Self::Scss),
189 "sql" => Some(Self::Sql),
190 "svelte" => Some(Self::Svelte),
191 "swift" => Some(Self::Swift),
192 "vue" => Some(Self::Vue),
193 "xml" => Some(Self::Xml),
194 "zig" => Some(Self::Zig),
195 _ => None,
196 }
197 }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default)]
201pub struct RawLineCounts {
202 pub total_physical_lines: u64,
203 pub blank_only_lines: u64,
204 pub code_only_lines: u64,
205 pub single_comment_only_lines: u64,
206 pub multi_comment_only_lines: u64,
207 pub mixed_code_single_comment_lines: u64,
208 pub mixed_code_multi_comment_lines: u64,
209 pub docstring_comment_lines: u64,
210 pub skipped_unknown_lines: u64,
211 #[serde(default)]
213 pub functions: u64,
214 #[serde(default)]
216 pub classes: u64,
217 #[serde(default)]
219 pub variables: u64,
220 #[serde(default)]
222 pub imports: u64,
223 #[serde(default)]
227 pub compiler_directive_lines: u64,
228 #[serde(default)]
231 pub test_count: u64,
232 #[serde(default)]
235 pub test_assertion_count: u64,
236 #[serde(default)]
239 pub test_suite_count: u64,
240}
241
242#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum ParseMode {
245 Lexical,
246 LexicalBestEffort,
247 TreeSitter,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct RawFileAnalysis {
252 pub raw: RawLineCounts,
253 pub parse_mode: ParseMode,
254 pub warnings: Vec<String>,
255}
256
257#[derive(Debug, Clone, Copy)]
262pub struct AnalysisOptions {
263 pub blank_in_block_comment_as_comment: bool,
266 pub collapse_continuation_lines: bool,
269}
270
271impl Default for AnalysisOptions {
272 fn default() -> Self {
273 Self {
274 blank_in_block_comment_as_comment: true,
275 collapse_continuation_lines: false,
276 }
277 }
278}
279
280#[must_use]
281pub fn supported_languages() -> BTreeSet<Language> {
282 [
283 Language::Assembly,
284 Language::C,
285 Language::Clojure,
286 Language::Cpp,
287 Language::CSharp,
288 Language::Css,
289 Language::Dart,
290 Language::Dockerfile,
291 Language::Elixir,
292 Language::Erlang,
293 Language::FSharp,
294 Language::Go,
295 Language::Groovy,
296 Language::Haskell,
297 Language::Html,
298 Language::Java,
299 Language::JavaScript,
300 Language::Julia,
301 Language::Kotlin,
302 Language::Lua,
303 Language::Makefile,
304 Language::Nim,
305 Language::ObjectiveC,
306 Language::Ocaml,
307 Language::Perl,
308 Language::Php,
309 Language::PowerShell,
310 Language::Python,
311 Language::R,
312 Language::Ruby,
313 Language::Rust,
314 Language::Scala,
315 Language::Scss,
316 Language::Shell,
317 Language::Sql,
318 Language::Svelte,
319 Language::Swift,
320 Language::TypeScript,
321 Language::Vue,
322 Language::Xml,
323 Language::Zig,
324 ]
325 .into_iter()
326 .collect()
327}
328
329fn detect_by_shebang(line: &str) -> Option<Language> {
331 let lower = line.to_ascii_lowercase();
332 if !lower.starts_with("#!") {
333 return None;
334 }
335 if lower.contains("python") {
336 return Some(Language::Python);
337 }
338 if lower.contains("pwsh") || lower.contains("powershell") {
339 return Some(Language::PowerShell);
340 }
341 if lower.contains("bash")
342 || lower.contains("/sh")
343 || lower.contains("zsh")
344 || lower.contains("ksh")
345 {
346 return Some(Language::Shell);
347 }
348 if lower.contains("ruby") {
349 return Some(Language::Ruby);
350 }
351 if lower.contains("perl") {
352 return Some(Language::Perl);
353 }
354 if lower.contains("php") {
355 return Some(Language::Php);
356 }
357 if lower.contains("node") || lower.contains("nodejs") {
358 return Some(Language::JavaScript);
359 }
360 None
361}
362
363fn detect_by_extension(ext: &str) -> Option<Language> {
365 static EXT_MAP: &[(&str, Language)] = &[
367 ("c", Language::C),
368 ("h", Language::C),
369 ("cc", Language::Cpp),
370 ("cp", Language::Cpp),
371 ("cpp", Language::Cpp),
372 ("cxx", Language::Cpp),
373 ("hh", Language::Cpp),
374 ("hpp", Language::Cpp),
375 ("hxx", Language::Cpp),
376 ("cs", Language::CSharp),
377 ("go", Language::Go),
378 ("java", Language::Java),
379 ("js", Language::JavaScript),
380 ("mjs", Language::JavaScript),
381 ("cjs", Language::JavaScript),
382 ("py", Language::Python),
383 ("rs", Language::Rust),
384 ("sh", Language::Shell),
385 ("bash", Language::Shell),
386 ("zsh", Language::Shell),
387 ("ksh", Language::Shell),
388 ("ps1", Language::PowerShell),
389 ("psm1", Language::PowerShell),
390 ("psd1", Language::PowerShell),
391 ("ts", Language::TypeScript),
392 ("mts", Language::TypeScript),
393 ("cts", Language::TypeScript),
394 ("asm", Language::Assembly),
395 ("s", Language::Assembly),
396 ("clj", Language::Clojure),
397 ("cljs", Language::Clojure),
398 ("cljc", Language::Clojure),
399 ("edn", Language::Clojure),
400 ("css", Language::Css),
401 ("dart", Language::Dart),
402 ("ex", Language::Elixir),
403 ("exs", Language::Elixir),
404 ("erl", Language::Erlang),
405 ("hrl", Language::Erlang),
406 ("fs", Language::FSharp),
407 ("fsi", Language::FSharp),
408 ("fsx", Language::FSharp),
409 ("groovy", Language::Groovy),
410 ("gradle", Language::Groovy),
411 ("hs", Language::Haskell),
412 ("lhs", Language::Haskell),
413 ("html", Language::Html),
414 ("htm", Language::Html),
415 ("xhtml", Language::Html),
416 ("jl", Language::Julia),
417 ("kt", Language::Kotlin),
418 ("kts", Language::Kotlin),
419 ("lua", Language::Lua),
420 ("mk", Language::Makefile),
421 ("nim", Language::Nim),
422 ("nims", Language::Nim),
423 ("m", Language::ObjectiveC),
424 ("mm", Language::ObjectiveC),
425 ("ml", Language::Ocaml),
426 ("mli", Language::Ocaml),
427 ("pl", Language::Perl),
428 ("pm", Language::Perl),
429 ("t", Language::Perl),
430 ("php", Language::Php),
431 ("php3", Language::Php),
432 ("php4", Language::Php),
433 ("php5", Language::Php),
434 ("php7", Language::Php),
435 ("phtml", Language::Php),
436 ("r", Language::R),
437 ("rb", Language::Ruby),
438 ("rake", Language::Ruby),
439 ("scala", Language::Scala),
440 ("sc", Language::Scala),
441 ("scss", Language::Scss),
442 ("sass", Language::Scss),
443 ("sql", Language::Sql),
444 ("svelte", Language::Svelte),
445 ("swift", Language::Swift),
446 ("vue", Language::Vue),
447 ("xml", Language::Xml),
448 ("xsd", Language::Xml),
449 ("xsl", Language::Xml),
450 ("xslt", Language::Xml),
451 ("svg", Language::Xml),
452 ("zig", Language::Zig),
453 ];
454 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
455}
456
457fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
459 if filename == "Dockerfile"
461 || filename.starts_with("Dockerfile.")
462 || filename_lower == "dockerfile"
463 {
464 return Some(Language::Dockerfile);
465 }
466 if matches!(
468 filename,
469 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
470 ) {
471 return Some(Language::Makefile);
472 }
473 if matches!(
475 filename,
476 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
477 ) {
478 return Some(Language::Ruby);
479 }
480 None
481}
482
483#[must_use]
484#[allow(clippy::too_many_lines)]
485pub fn detect_language(
486 path: &Path,
487 first_line: Option<&str>,
488 extension_overrides: &BTreeMap<String, String>,
489 shebang_detection: bool,
490) -> Option<Language> {
491 let extension = path
492 .extension()
493 .and_then(|ext| ext.to_str())
494 .map(str::to_ascii_lowercase);
495
496 if let Some(ext) = extension.as_ref() {
498 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
499 if let Some(lang) = Language::from_name(override_name) {
500 return Some(lang);
501 }
502 }
503 }
504
505 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
507 let filename_lower = filename.to_ascii_lowercase();
508
509 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
510 return Some(lang);
511 }
512
513 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
515 return Some(lang);
516 }
517
518 if shebang_detection {
520 if let Some(line) = first_line {
521 if let Some(lang) = detect_by_shebang(line) {
522 return Some(lang);
523 }
524 }
525 }
526
527 None
528}
529
530#[must_use]
531pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
532 #[cfg(feature = "tree-sitter")]
534 {
535 match language {
536 Language::C | Language::Cpp => {
537 if let Some(result) = ts::analyze_c(text) {
538 return result;
539 }
540 }
541 Language::Python => {
542 if let Some(result) = ts::analyze_python(text) {
543 return result;
544 }
545 }
546 _ => {}
547 }
548 }
549
550 let (mut config, has_preprocessor) = language_scan_config(language);
551
552 if language == Language::Python {
554 config.skip_lines = detect_python_docstring_lines(text);
555 }
556
557 let flags = IeeeFlags {
560 has_preprocessor_directives: has_preprocessor,
561 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
562 collapse_continuation_lines: options.collapse_continuation_lines,
563 };
564 analyze_generic(text, config, flags)
565}
566
567fn language_scan_config(language: Language) -> (ScanConfig, bool) {
575 let cfg = LANG_SCAN_TABLE
576 .iter()
577 .find_map(|&(l, c)| (l == language).then_some(c))
578 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
579 (
580 ScanConfig {
581 line_comments: cfg.line_comments,
582 block_comment: cfg.block_comment,
583 allow_single_quote_strings: cfg.allow_single_quote_strings,
584 allow_double_quote_strings: cfg.allow_double_quote_strings,
585 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
586 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
587 skip_lines: HashSet::new(),
588 symbol_patterns: cfg.symbol_patterns,
589 },
590 cfg.has_preprocessor,
591 )
592}
593
594#[derive(Debug, Clone, Copy)]
598struct SymbolPatterns {
599 functions: &'static [&'static str],
600 functions_prefix_paren: &'static [&'static str],
606 classes: &'static [&'static str],
607 variables: &'static [&'static str],
608 imports: &'static [&'static str],
609 tests: &'static [&'static str],
612 assertions: &'static [&'static str],
615 test_suites: &'static [&'static str],
618}
619
620impl SymbolPatterns {
621 const fn none() -> Self {
622 Self {
623 functions: &[],
624 functions_prefix_paren: &[],
625 classes: &[],
626 variables: &[],
627 imports: &[],
628 tests: &[],
629 assertions: &[],
630 test_suites: &[],
631 }
632 }
633}
634
635const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_RUST: SymbolPatterns = SymbolPatterns {
638 functions: &[
639 "fn ",
640 "pub fn ",
641 "pub(crate) fn ",
642 "pub(super) fn ",
643 "async fn ",
644 "pub async fn ",
645 "pub(crate) async fn ",
646 "unsafe fn ",
647 "pub unsafe fn ",
648 "pub(crate) unsafe fn ",
649 "const fn ",
650 "pub const fn ",
651 "pub(crate) const fn ",
652 "extern fn ",
653 "pub extern fn ",
654 ],
655 functions_prefix_paren: &[],
656 classes: &[
657 "struct ",
658 "pub struct ",
659 "pub(crate) struct ",
660 "enum ",
661 "pub enum ",
662 "pub(crate) enum ",
663 "trait ",
664 "pub trait ",
665 "pub(crate) trait ",
666 "impl ",
667 "impl<",
668 "type ",
669 "pub type ",
670 "pub(crate) type ",
671 ],
672 variables: &["let ", "let mut "],
673 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
674 tests: &[
676 "#[test]",
677 "#[tokio::test]",
678 "#[actix_web::test]",
679 "#[rstest]",
680 "#[test_case",
681 ],
682 assertions: &[
683 "assert_eq!(",
684 "assert_ne!(",
685 "assert!(",
686 "assert_matches!(",
687 "assert_err!(",
688 "assert_ok!(",
689 ],
690 test_suites: &[],
691};
692
693const SP_PYTHON: SymbolPatterns = SymbolPatterns {
694 functions: &["def ", "async def "],
695 functions_prefix_paren: &[],
696 classes: &["class "],
697 variables: &[],
698 imports: &["import ", "from "],
699 tests: &["def test_", "async def test_", "class Test"],
701 assertions: &[
702 "self.assertEqual(",
703 "self.assertNotEqual(",
704 "self.assertTrue(",
705 "self.assertFalse(",
706 "self.assertIsNone(",
707 "self.assertIsNotNone(",
708 "self.assertIn(",
709 "self.assertNotIn(",
710 "self.assertRaises(",
711 "self.assertAlmostEqual(",
712 ],
713 test_suites: &[],
714};
715
716const SP_JS: SymbolPatterns = SymbolPatterns {
717 functions: &[
718 "function ",
719 "async function ",
720 "export function ",
721 "export async function ",
722 "export default function ",
723 ],
724 functions_prefix_paren: &[],
725 classes: &["class ", "export class ", "export default class "],
726 variables: &[
727 "var ",
728 "let ",
729 "const ",
730 "export var ",
731 "export let ",
732 "export const ",
733 ],
734 imports: &["import "],
735 tests: &[
737 "describe(",
738 "it(",
739 "test(",
740 "it.each(",
741 "test.each(",
742 "describe.each(",
743 ],
744 assertions: &["expect("],
745 test_suites: &[],
746};
747
748const SP_TS: SymbolPatterns = SymbolPatterns {
749 functions: &[
750 "function ",
751 "async function ",
752 "export function ",
753 "export async function ",
754 "export default function ",
755 ],
756 functions_prefix_paren: &[],
757 classes: &[
758 "class ",
759 "export class ",
760 "export default class ",
761 "abstract class ",
762 "export abstract class ",
763 "interface ",
764 "export interface ",
765 "declare class ",
766 "declare interface ",
767 ],
768 variables: &[
769 "var ",
770 "let ",
771 "const ",
772 "export var ",
773 "export let ",
774 "export const ",
775 ],
776 imports: &["import "],
777 tests: &[
779 "describe(",
780 "it(",
781 "test(",
782 "it.each(",
783 "test.each(",
784 "describe.each(",
785 ],
786 assertions: &["expect("],
787 test_suites: &[],
788};
789
790const SP_GO: SymbolPatterns = SymbolPatterns {
791 functions: &["func "],
792 functions_prefix_paren: &[],
793 classes: &["type "],
794 variables: &["var "],
795 imports: &["import "],
796 tests: &["func Test", "func Benchmark", "func Fuzz"],
798 assertions: &[],
799 test_suites: &[],
800};
801
802const SP_JAVA: SymbolPatterns = SymbolPatterns {
803 functions: &[],
804 functions_prefix_paren: &[],
805 classes: &[
806 "class ",
807 "public class ",
808 "private class ",
809 "protected class ",
810 "abstract class ",
811 "final class ",
812 "public abstract class ",
813 "public final class ",
814 "interface ",
815 "public interface ",
816 "enum ",
817 "public enum ",
818 "record ",
819 "public record ",
820 "@interface ",
821 ],
822 variables: &[],
823 imports: &["import "],
824 tests: &[
826 "@Test",
827 "@ParameterizedTest",
828 "@RepeatedTest",
829 "@TestFactory",
830 "@TestTemplate",
831 ],
832 assertions: &[
833 "assertEquals(",
834 "assertNotEquals(",
835 "assertTrue(",
836 "assertFalse(",
837 "assertNull(",
838 "assertNotNull(",
839 "assertThat(",
840 "assertThrows(",
841 "assertAll(",
842 "assertArrayEquals(",
843 "assertIterableEquals(",
844 "assertLinesMatch(",
845 ],
846 test_suites: &[],
847};
848
849const SP_CSHARP: SymbolPatterns = SymbolPatterns {
850 functions: &[],
851 functions_prefix_paren: &[],
852 classes: &[
853 "class ",
854 "public class ",
855 "private class ",
856 "protected class ",
857 "internal class ",
858 "abstract class ",
859 "sealed class ",
860 "static class ",
861 "partial class ",
862 "public abstract class ",
863 "public sealed class ",
864 "public static class ",
865 "interface ",
866 "public interface ",
867 "internal interface ",
868 "enum ",
869 "public enum ",
870 "struct ",
871 "public struct ",
872 "record ",
873 "public record ",
874 ],
875 variables: &["var "],
876 imports: &["using "],
877 tests: &[
879 "[TestMethod]",
880 "[Test]",
881 "[Fact]",
882 "[Theory]",
883 "[TestCase(",
884 "[DataRow(",
885 "[InlineData(",
886 "[MemberData(",
887 ],
888 assertions: &[
889 "Assert.AreEqual(",
890 "Assert.AreNotEqual(",
891 "Assert.IsTrue(",
892 "Assert.IsFalse(",
893 "Assert.IsNull(",
894 "Assert.IsNotNull(",
895 "Assert.Equal(",
896 "Assert.NotEqual(",
897 "Assert.True(",
898 "Assert.False(",
899 "Assert.That(",
900 "Assert.Contains(",
901 "Assert.Throws(",
902 "Assert.ThrowsAsync(",
903 "Assert.IsInstanceOfType(",
904 ],
905 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
906};
907
908const TEST_PATTERNS_C_CPP: &[&str] = &[
910 "TEST(",
912 "TEST_F(",
913 "TEST_P(",
914 "TYPED_TEST(",
915 "TYPED_TEST_P(",
916 "INSTANTIATE_TEST_SUITE_P(",
917 "INSTANTIATE_TYPED_TEST_SUITE_P(",
918 "TEST_CASE(",
920 "SECTION(",
921 "SCENARIO(",
922 "SCENARIO_METHOD(",
923 "TEST_CASE_METHOD(",
924 "BOOST_AUTO_TEST_CASE(",
926 "BOOST_FIXTURE_TEST_CASE(",
927 "BOOST_AUTO_TEST_SUITE(",
928 "BOOST_PARAM_TEST_CASE(",
929 "CPPUNIT_TEST(",
931 "CPPUNIT_TEST_SUITE(",
932 "RUN_TEST(",
934 "TEST_IGNORE(",
935 "TEST_FAIL(",
936 "START_TEST(",
938 "tcase_add_test(",
939 "suite_create(",
940 "cmocka_unit_test(",
942 "cmocka_run_group_tests(",
943 "IGNORE_TEST(",
945 "TEST_GROUP(",
946 "TEST_GROUP_BASE(",
947];
948
949const ASSERT_PATTERNS_C_CPP: &[&str] = &[
951 "ASSERT_EQ(",
953 "ASSERT_NE(",
954 "ASSERT_LT(",
955 "ASSERT_LE(",
956 "ASSERT_GT(",
957 "ASSERT_GE(",
958 "ASSERT_TRUE(",
959 "ASSERT_FALSE(",
960 "ASSERT_STREQ(",
961 "ASSERT_STRNE(",
962 "ASSERT_FLOAT_EQ(",
963 "ASSERT_DOUBLE_EQ(",
964 "ASSERT_NEAR(",
965 "ASSERT_THROW(",
966 "ASSERT_NO_THROW(",
967 "ASSERT_ANY_THROW(",
968 "EXPECT_EQ(",
970 "EXPECT_NE(",
971 "EXPECT_LT(",
972 "EXPECT_LE(",
973 "EXPECT_GT(",
974 "EXPECT_GE(",
975 "EXPECT_TRUE(",
976 "EXPECT_FALSE(",
977 "EXPECT_STREQ(",
978 "EXPECT_STRNE(",
979 "EXPECT_FLOAT_EQ(",
980 "EXPECT_DOUBLE_EQ(",
981 "EXPECT_NEAR(",
982 "EXPECT_THROW(",
983 "EXPECT_NO_THROW(",
984 "EXPECT_ANY_THROW(",
985 "REQUIRE(",
987 "CHECK(",
988 "REQUIRE_FALSE(",
989 "CHECK_FALSE(",
990 "REQUIRE_NOTHROW(",
991 "CHECK_NOTHROW(",
992 "REQUIRE_THROWS(",
993 "CHECK_THROWS(",
994 "REQUIRE_THAT(",
995 "CHECK_THAT(",
996 "TEST_ASSERT_EQUAL(",
998 "TEST_ASSERT_EQUAL_INT(",
999 "TEST_ASSERT_EQUAL_STRING(",
1000 "TEST_ASSERT_EQUAL_FLOAT(",
1001 "TEST_ASSERT_EQUAL_DOUBLE(",
1002 "TEST_ASSERT_EQUAL_PTR(",
1003 "TEST_ASSERT_TRUE(",
1004 "TEST_ASSERT_FALSE(",
1005 "TEST_ASSERT_NULL(",
1006 "TEST_ASSERT_NOT_NULL(",
1007 "TEST_ASSERT_BITS_HIGH(",
1008 "TEST_ASSERT_BITS_LOW(",
1009 "assert_int_equal(",
1011 "assert_int_not_equal(",
1012 "assert_string_equal(",
1013 "assert_string_not_equal(",
1014 "assert_true(",
1015 "assert_false(",
1016 "assert_null(",
1017 "assert_non_null(",
1018 "assert_ptr_equal(",
1019 "assert_memory_equal(",
1020 "assert_return_code(",
1021];
1022
1023const SUITE_PATTERNS_C_CPP: &[&str] = &[
1025 "TEST_GROUP(",
1026 "TEST_GROUP_BASE(",
1027 "BOOST_AUTO_TEST_SUITE(",
1028 "CPPUNIT_TEST_SUITE(",
1029 "CPPUNIT_TEST_SUITE_END(",
1030];
1031
1032const SP_C: SymbolPatterns = SymbolPatterns {
1033 functions: &[],
1035 functions_prefix_paren: &[
1036 "void ",
1037 "int ",
1038 "char ",
1039 "float ",
1040 "double ",
1041 "long ",
1042 "unsigned ",
1043 "size_t ",
1044 "static ",
1045 "inline ",
1046 "const ",
1047 "extern ",
1048 ],
1049 classes: &[
1050 "struct ",
1051 "typedef struct ",
1052 "union ",
1053 "typedef union ",
1054 "typedef enum ",
1055 ],
1056 variables: &[],
1057 imports: &["#include "],
1058 tests: TEST_PATTERNS_C_CPP,
1059 assertions: ASSERT_PATTERNS_C_CPP,
1060 test_suites: SUITE_PATTERNS_C_CPP,
1061};
1062
1063const SP_CPP: SymbolPatterns = SymbolPatterns {
1064 functions: &[
1066 "virtual ", "explicit ", "~", "operator", ],
1071 functions_prefix_paren: &[
1072 "void ",
1073 "bool ",
1074 "int ",
1075 "char ",
1076 "float ",
1077 "double ",
1078 "long ",
1079 "unsigned ",
1080 "size_t ",
1081 "auto ",
1082 "static ",
1083 "inline ",
1084 "constexpr ",
1085 "const ",
1086 "extern ",
1087 ],
1088 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1090 variables: &[],
1091 imports: &["#include "],
1092 tests: TEST_PATTERNS_C_CPP,
1093 assertions: ASSERT_PATTERNS_C_CPP,
1094 test_suites: SUITE_PATTERNS_C_CPP,
1095};
1096
1097const SP_SHELL: SymbolPatterns = SymbolPatterns {
1098 functions: &["function "],
1099 functions_prefix_paren: &[],
1100 classes: &[],
1101 variables: &["declare ", "local ", "export "],
1102 imports: &["source ", ". "],
1103 tests: &[],
1104 assertions: &[],
1105 test_suites: &[],
1106};
1107
1108const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1109 functions: &["function ", "Function "],
1110 functions_prefix_paren: &[],
1111 classes: &["class "],
1112 variables: &[],
1113 imports: &["Import-Module ", "using "],
1114 tests: &["Describe ", "It ", "Context "],
1116 assertions: &[],
1117 test_suites: &[],
1118};
1119
1120const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1121 functions: &[
1122 "fun ",
1123 "private fun ",
1124 "public fun ",
1125 "protected fun ",
1126 "internal fun ",
1127 "override fun ",
1128 "suspend fun ",
1129 "abstract fun ",
1130 "open fun ",
1131 "private suspend fun ",
1132 "public suspend fun ",
1133 ],
1134 functions_prefix_paren: &[],
1135 classes: &[
1136 "class ",
1137 "data class ",
1138 "sealed class ",
1139 "abstract class ",
1140 "open class ",
1141 "object ",
1142 "companion object",
1143 "interface ",
1144 "enum class ",
1145 "annotation class ",
1146 ],
1147 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1148 imports: &["import "],
1149 tests: &[
1151 "@Test",
1152 "@ParameterizedTest",
1153 "@RepeatedTest",
1154 "\"should ",
1155 "\"it ",
1156 ],
1157 assertions: &[
1158 "assertEquals(",
1159 "assertNotEquals(",
1160 "assertTrue(",
1161 "assertFalse(",
1162 "assertNull(",
1163 "assertNotNull(",
1164 "assertThat(",
1165 "assertThrows(",
1166 "shouldBe(",
1167 "shouldNotBe(",
1168 "shouldThrow(",
1169 ],
1170 test_suites: &[],
1171};
1172
1173const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1174 functions: &[
1175 "func ",
1176 "private func ",
1177 "public func ",
1178 "internal func ",
1179 "override func ",
1180 "open func ",
1181 "static func ",
1182 "class func ",
1183 "mutating func ",
1184 "private static func ",
1185 "public static func ",
1186 ],
1187 functions_prefix_paren: &[],
1188 classes: &[
1189 "class ",
1190 "struct ",
1191 "protocol ",
1192 "enum ",
1193 "extension ",
1194 "actor ",
1195 "public class ",
1196 "private class ",
1197 "open class ",
1198 "final class ",
1199 "public struct ",
1200 "private struct ",
1201 "public protocol ",
1202 ],
1203 variables: &[
1204 "var ",
1205 "let ",
1206 "private var ",
1207 "private let ",
1208 "static var ",
1209 "static let ",
1210 ],
1211 imports: &["import "],
1212 tests: &["func test", "func Test", "@Test"],
1214 assertions: &[
1215 "XCTAssertEqual(",
1216 "XCTAssertNotEqual(",
1217 "XCTAssertTrue(",
1218 "XCTAssertFalse(",
1219 "XCTAssertNil(",
1220 "XCTAssertNotNil(",
1221 "XCTAssertGreaterThan(",
1222 "XCTAssertLessThan(",
1223 "XCTAssertThrowsError(",
1224 "XCTAssertNoThrow(",
1225 "#expect(",
1226 ],
1227 test_suites: &[],
1228};
1229
1230const SP_RUBY: SymbolPatterns = SymbolPatterns {
1231 functions: &["def ", "private def ", "protected def "],
1232 functions_prefix_paren: &[],
1233 classes: &["class ", "module "],
1234 variables: &[],
1235 imports: &["require ", "require_relative "],
1236 tests: &["it ", "it(", "describe ", "context ", "test "],
1238 assertions: &[],
1239 test_suites: &[],
1240};
1241
1242const SP_SCALA: SymbolPatterns = SymbolPatterns {
1243 functions: &["def ", "private def ", "protected def ", "override def "],
1244 functions_prefix_paren: &[],
1245 classes: &[
1246 "class ",
1247 "case class ",
1248 "abstract class ",
1249 "sealed class ",
1250 "object ",
1251 "trait ",
1252 ],
1253 variables: &["val ", "var ", "lazy val "],
1254 imports: &["import "],
1255 tests: &["test(", "it(", "describe("],
1257 assertions: &[],
1258 test_suites: &[],
1259};
1260
1261const SP_PHP: SymbolPatterns = SymbolPatterns {
1262 functions: &[
1263 "function ",
1264 "public function ",
1265 "private function ",
1266 "protected function ",
1267 "static function ",
1268 "abstract function ",
1269 "final function ",
1270 "public static function ",
1271 "private static function ",
1272 "protected static function ",
1273 ],
1274 functions_prefix_paren: &[],
1275 classes: &[
1276 "class ",
1277 "abstract class ",
1278 "final class ",
1279 "interface ",
1280 "trait ",
1281 "enum ",
1282 ],
1283 variables: &[],
1284 imports: &[
1285 "use ",
1286 "require ",
1287 "require_once ",
1288 "include ",
1289 "include_once ",
1290 ],
1291 tests: &[
1293 "public function test",
1294 "function test",
1295 "#[Test]",
1296 "#[DataProvider(",
1297 ],
1298 assertions: &[],
1299 test_suites: &[],
1300};
1301
1302const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1303 functions: &[
1304 "def ",
1305 "defp ",
1306 "defmacro ",
1307 "defmacrop ",
1308 "defguard ",
1309 "defguardp ",
1310 ],
1311 functions_prefix_paren: &[],
1312 classes: &["defmodule ", "defprotocol ", "defimpl "],
1313 variables: &[],
1314 imports: &["import ", "alias ", "use ", "require "],
1315 tests: &["test ", "describe "],
1317 assertions: &[],
1318 test_suites: &[],
1319};
1320
1321const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1322 functions: &[],
1323 functions_prefix_paren: &[],
1324 classes: &["-module("],
1325 variables: &[],
1326 imports: &["-import(", "-include(", "-include_lib("],
1327 tests: &[],
1328 assertions: &[],
1329 test_suites: &[],
1330};
1331
1332const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1333 functions: &[
1334 "let ",
1335 "let rec ",
1336 "member ",
1337 "override ",
1338 "abstract member ",
1339 ],
1340 functions_prefix_paren: &[],
1341 classes: &["type "],
1342 variables: &["let mutable "],
1343 imports: &["open "],
1344 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1346 assertions: &[],
1347 test_suites: &[],
1348};
1349
1350const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1351 functions: &["def ", "private def ", "public def ", "protected def "],
1352 functions_prefix_paren: &[],
1353 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1354 variables: &[],
1355 imports: &["import "],
1356 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1358 assertions: &[],
1359 test_suites: &[],
1360};
1361
1362const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1363 functions: &[],
1364 functions_prefix_paren: &[],
1365 classes: &["class ", "data ", "newtype ", "type "],
1366 variables: &[],
1367 imports: &["import "],
1368 tests: &[],
1369 assertions: &[],
1370 test_suites: &[],
1371};
1372
1373const SP_LUA: SymbolPatterns = SymbolPatterns {
1374 functions: &["function ", "local function "],
1375 functions_prefix_paren: &[],
1376 classes: &[],
1377 variables: &["local "],
1378 imports: &[],
1379 tests: &["it(", "describe(", "pending("],
1381 assertions: &[],
1382 test_suites: &[],
1383};
1384
1385const SP_NIM: SymbolPatterns = SymbolPatterns {
1386 functions: &[
1387 "proc ",
1388 "func ",
1389 "method ",
1390 "iterator ",
1391 "converter ",
1392 "template ",
1393 "macro ",
1394 ],
1395 functions_prefix_paren: &[],
1396 classes: &["type "],
1397 variables: &["var ", "let ", "const "],
1398 imports: &["import ", "from "],
1399 tests: &["test "],
1401 assertions: &[],
1402 test_suites: &[],
1403};
1404
1405const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1406 functions: &["- (", "+ ("],
1407 functions_prefix_paren: &[],
1408 classes: &["@interface ", "@implementation ", "@protocol "],
1409 variables: &[],
1410 imports: &["#import ", "#include "],
1411 tests: &["- (void)test"],
1413 assertions: &[
1414 "XCTAssertEqual(",
1415 "XCTAssertNotEqual(",
1416 "XCTAssertTrue(",
1417 "XCTAssertFalse(",
1418 "XCTAssertNil(",
1419 "XCTAssertNotNil(",
1420 "XCTAssertGreaterThan(",
1421 "XCTAssertLessThan(",
1422 "XCTAssertThrowsError(",
1423 "XCTAssertNoThrow(",
1424 ],
1425 test_suites: &[],
1426};
1427
1428const SP_OCAML: SymbolPatterns = SymbolPatterns {
1429 functions: &["let ", "let rec "],
1430 functions_prefix_paren: &[],
1431 classes: &["type ", "module ", "class "],
1432 variables: &[],
1433 imports: &["open "],
1434 tests: &[],
1435 assertions: &[],
1436 test_suites: &[],
1437};
1438
1439const SP_PERL: SymbolPatterns = SymbolPatterns {
1440 functions: &["sub "],
1441 functions_prefix_paren: &[],
1442 classes: &["package "],
1443 variables: &["my ", "our ", "local "],
1444 imports: &["use ", "require "],
1445 tests: &[],
1446 assertions: &[],
1447 test_suites: &[],
1448};
1449
1450const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1451 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1452 functions_prefix_paren: &[],
1453 classes: &[
1454 "(defrecord ",
1455 "(defprotocol ",
1456 "(deftype ",
1457 "(definterface ",
1458 ],
1459 variables: &["(def ", "(defonce "],
1460 imports: &["(ns ", "(require "],
1461 tests: &["(deftest ", "(testing "],
1463 assertions: &[],
1464 test_suites: &[],
1465};
1466
1467const SP_JULIA: SymbolPatterns = SymbolPatterns {
1468 functions: &["function ", "macro "],
1469 functions_prefix_paren: &[],
1470 classes: &[
1471 "struct ",
1472 "mutable struct ",
1473 "abstract type ",
1474 "primitive type ",
1475 ],
1476 variables: &["const "],
1477 imports: &["import ", "using "],
1478 tests: &["@test ", "@testset "],
1480 assertions: &[],
1481 test_suites: &[],
1482};
1483
1484const SP_DART: SymbolPatterns = SymbolPatterns {
1485 functions: &[],
1486 functions_prefix_paren: &[],
1487 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1488 variables: &["var ", "final ", "const ", "late "],
1489 imports: &["import "],
1490 tests: &["test(", "testWidgets(", "group("],
1492 assertions: &[],
1493 test_suites: &[],
1494};
1495
1496const SP_R: SymbolPatterns = SymbolPatterns {
1497 functions: &[],
1498 functions_prefix_paren: &[],
1499 classes: &[],
1500 variables: &[],
1501 imports: &["library(", "source("],
1502 tests: &["test_that(", "it(", "describe(", "expect_"],
1504 assertions: &[],
1505 test_suites: &[],
1506};
1507
1508const SP_SQL: SymbolPatterns = SymbolPatterns {
1509 functions: &[
1510 "create function ",
1511 "create or replace function ",
1512 "create procedure ",
1513 "create or replace procedure ",
1514 "CREATE FUNCTION ",
1515 "CREATE OR REPLACE FUNCTION ",
1516 "CREATE PROCEDURE ",
1517 "CREATE OR REPLACE PROCEDURE ",
1518 ],
1519 functions_prefix_paren: &[],
1520 classes: &[
1521 "create table ",
1522 "create view ",
1523 "create schema ",
1524 "CREATE TABLE ",
1525 "CREATE VIEW ",
1526 "CREATE SCHEMA ",
1527 ],
1528 variables: &["declare ", "DECLARE "],
1529 imports: &[],
1530 tests: &[],
1531 assertions: &[],
1532 test_suites: &[],
1533};
1534
1535const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1536 functions: &["proc ", "PROC "],
1537 functions_prefix_paren: &[],
1538 classes: &[],
1539 variables: &[],
1540 imports: &["include ", "INCLUDE ", "%include "],
1541 tests: &[],
1542 assertions: &[],
1543 test_suites: &[],
1544};
1545
1546const SP_ZIG: SymbolPatterns = SymbolPatterns {
1547 functions: &[
1548 "fn ",
1549 "pub fn ",
1550 "export fn ",
1551 "inline fn ",
1552 "pub inline fn ",
1553 ],
1554 functions_prefix_paren: &[],
1555 classes: &[],
1556 variables: &["var ", "pub var "],
1557 imports: &[],
1558 tests: &["test \"", "test{"],
1560 assertions: &[],
1561 test_suites: &[],
1562};
1563
1564#[allow(clippy::struct_excessive_bools)]
1568#[derive(Clone, Copy)]
1569struct StaticLangConfig {
1570 line_comments: &'static [&'static str],
1571 block_comment: Option<(&'static str, &'static str)>,
1572 allow_single_quote_strings: bool,
1573 allow_double_quote_strings: bool,
1574 allow_triple_quote_strings: bool,
1575 allow_csharp_verbatim_strings: bool,
1576 symbol_patterns: SymbolPatterns,
1577 has_preprocessor: bool,
1579}
1580
1581#[allow(clippy::struct_excessive_bools)]
1582#[derive(Debug, Clone)]
1583struct ScanConfig {
1584 line_comments: &'static [&'static str],
1585 block_comment: Option<(&'static str, &'static str)>,
1586 allow_single_quote_strings: bool,
1587 allow_double_quote_strings: bool,
1588 allow_triple_quote_strings: bool,
1589 allow_csharp_verbatim_strings: bool,
1590 skip_lines: HashSet<usize>,
1591 symbol_patterns: SymbolPatterns,
1592}
1593
1594static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1598 (
1599 Language::C,
1600 StaticLangConfig {
1601 line_comments: &["//"],
1602 block_comment: Some(("/*", "*/")),
1603 allow_single_quote_strings: true,
1604 allow_double_quote_strings: true,
1605 allow_triple_quote_strings: false,
1606 allow_csharp_verbatim_strings: false,
1607 symbol_patterns: SP_C,
1608 has_preprocessor: true,
1609 },
1610 ),
1611 (
1612 Language::Cpp,
1613 StaticLangConfig {
1614 line_comments: &["//"],
1615 block_comment: Some(("/*", "*/")),
1616 allow_single_quote_strings: true,
1617 allow_double_quote_strings: true,
1618 allow_triple_quote_strings: false,
1619 allow_csharp_verbatim_strings: false,
1620 symbol_patterns: SP_CPP,
1621 has_preprocessor: true,
1622 },
1623 ),
1624 (
1625 Language::ObjectiveC,
1626 StaticLangConfig {
1627 line_comments: &["//"],
1628 block_comment: Some(("/*", "*/")),
1629 allow_single_quote_strings: true,
1630 allow_double_quote_strings: true,
1631 allow_triple_quote_strings: false,
1632 allow_csharp_verbatim_strings: false,
1633 symbol_patterns: SP_OBJECTIVEC,
1634 has_preprocessor: true,
1635 },
1636 ),
1637 (
1638 Language::CSharp,
1639 StaticLangConfig {
1640 line_comments: &["//"],
1641 block_comment: Some(("/*", "*/")),
1642 allow_single_quote_strings: true,
1643 allow_double_quote_strings: true,
1644 allow_triple_quote_strings: false,
1645 allow_csharp_verbatim_strings: true,
1646 symbol_patterns: SP_CSHARP,
1647 has_preprocessor: false,
1648 },
1649 ),
1650 (
1651 Language::Go,
1652 StaticLangConfig {
1653 line_comments: &["//"],
1654 block_comment: Some(("/*", "*/")),
1655 allow_single_quote_strings: true,
1656 allow_double_quote_strings: true,
1657 allow_triple_quote_strings: false,
1658 allow_csharp_verbatim_strings: false,
1659 symbol_patterns: SP_GO,
1660 has_preprocessor: false,
1661 },
1662 ),
1663 (
1664 Language::Java,
1665 StaticLangConfig {
1666 line_comments: &["//"],
1667 block_comment: Some(("/*", "*/")),
1668 allow_single_quote_strings: true,
1669 allow_double_quote_strings: true,
1670 allow_triple_quote_strings: false,
1671 allow_csharp_verbatim_strings: false,
1672 symbol_patterns: SP_JAVA,
1673 has_preprocessor: false,
1674 },
1675 ),
1676 (
1677 Language::JavaScript,
1678 StaticLangConfig {
1679 line_comments: &["//"],
1680 block_comment: Some(("/*", "*/")),
1681 allow_single_quote_strings: true,
1682 allow_double_quote_strings: true,
1683 allow_triple_quote_strings: false,
1684 allow_csharp_verbatim_strings: false,
1685 symbol_patterns: SP_JS,
1686 has_preprocessor: false,
1687 },
1688 ),
1689 (
1690 Language::Svelte,
1691 StaticLangConfig {
1692 line_comments: &["//"],
1693 block_comment: Some(("/*", "*/")),
1694 allow_single_quote_strings: true,
1695 allow_double_quote_strings: true,
1696 allow_triple_quote_strings: false,
1697 allow_csharp_verbatim_strings: false,
1698 symbol_patterns: SP_JS,
1699 has_preprocessor: false,
1700 },
1701 ),
1702 (
1703 Language::Vue,
1704 StaticLangConfig {
1705 line_comments: &["//"],
1706 block_comment: Some(("/*", "*/")),
1707 allow_single_quote_strings: true,
1708 allow_double_quote_strings: true,
1709 allow_triple_quote_strings: false,
1710 allow_csharp_verbatim_strings: false,
1711 symbol_patterns: SP_JS,
1712 has_preprocessor: false,
1713 },
1714 ),
1715 (
1716 Language::Rust,
1717 StaticLangConfig {
1718 line_comments: &["//"],
1719 block_comment: Some(("/*", "*/")),
1720 allow_single_quote_strings: false,
1721 allow_double_quote_strings: true,
1722 allow_triple_quote_strings: false,
1723 allow_csharp_verbatim_strings: false,
1724 symbol_patterns: SP_RUST,
1725 has_preprocessor: false,
1726 },
1727 ),
1728 (
1729 Language::Shell,
1730 StaticLangConfig {
1731 line_comments: &["#"],
1732 block_comment: None,
1733 allow_single_quote_strings: true,
1734 allow_double_quote_strings: true,
1735 allow_triple_quote_strings: false,
1736 allow_csharp_verbatim_strings: false,
1737 symbol_patterns: SP_SHELL,
1738 has_preprocessor: false,
1739 },
1740 ),
1741 (
1742 Language::PowerShell,
1743 StaticLangConfig {
1744 line_comments: &["#"],
1745 block_comment: Some(("<#", "#>")),
1746 allow_single_quote_strings: true,
1747 allow_double_quote_strings: true,
1748 allow_triple_quote_strings: false,
1749 allow_csharp_verbatim_strings: false,
1750 symbol_patterns: SP_POWERSHELL,
1751 has_preprocessor: false,
1752 },
1753 ),
1754 (
1755 Language::TypeScript,
1756 StaticLangConfig {
1757 line_comments: &["//"],
1758 block_comment: Some(("/*", "*/")),
1759 allow_single_quote_strings: true,
1760 allow_double_quote_strings: true,
1761 allow_triple_quote_strings: false,
1762 allow_csharp_verbatim_strings: false,
1763 symbol_patterns: SP_TS,
1764 has_preprocessor: false,
1765 },
1766 ),
1767 (
1768 Language::Python,
1769 StaticLangConfig {
1770 line_comments: &["#"],
1771 block_comment: None,
1772 allow_single_quote_strings: true,
1773 allow_double_quote_strings: true,
1774 allow_triple_quote_strings: true,
1775 allow_csharp_verbatim_strings: false,
1776 symbol_patterns: SP_PYTHON,
1777 has_preprocessor: false,
1778 },
1779 ),
1780 (
1781 Language::Assembly,
1782 StaticLangConfig {
1783 line_comments: &[";"],
1784 block_comment: None,
1785 allow_single_quote_strings: false,
1786 allow_double_quote_strings: false,
1787 allow_triple_quote_strings: false,
1788 allow_csharp_verbatim_strings: false,
1789 symbol_patterns: SP_ASSEMBLY,
1790 has_preprocessor: false,
1791 },
1792 ),
1793 (
1794 Language::Clojure,
1795 StaticLangConfig {
1796 line_comments: &[";"],
1797 block_comment: None,
1798 allow_single_quote_strings: false,
1799 allow_double_quote_strings: true,
1800 allow_triple_quote_strings: false,
1801 allow_csharp_verbatim_strings: false,
1802 symbol_patterns: SP_CLOJURE,
1803 has_preprocessor: false,
1804 },
1805 ),
1806 (
1807 Language::Css,
1808 StaticLangConfig {
1809 line_comments: &[],
1810 block_comment: Some(("/*", "*/")),
1811 allow_single_quote_strings: true,
1812 allow_double_quote_strings: true,
1813 allow_triple_quote_strings: false,
1814 allow_csharp_verbatim_strings: false,
1815 symbol_patterns: SP_NONE,
1816 has_preprocessor: false,
1817 },
1818 ),
1819 (
1820 Language::Dart,
1821 StaticLangConfig {
1822 line_comments: &["//"],
1823 block_comment: Some(("/*", "*/")),
1824 allow_single_quote_strings: true,
1825 allow_double_quote_strings: true,
1826 allow_triple_quote_strings: false,
1827 allow_csharp_verbatim_strings: false,
1828 symbol_patterns: SP_DART,
1829 has_preprocessor: false,
1830 },
1831 ),
1832 (
1833 Language::Dockerfile,
1834 StaticLangConfig {
1835 line_comments: &["#"],
1836 block_comment: None,
1837 allow_single_quote_strings: false,
1838 allow_double_quote_strings: false,
1839 allow_triple_quote_strings: false,
1840 allow_csharp_verbatim_strings: false,
1841 symbol_patterns: SP_NONE,
1842 has_preprocessor: false,
1843 },
1844 ),
1845 (
1846 Language::Elixir,
1847 StaticLangConfig {
1848 line_comments: &["#"],
1849 block_comment: None,
1850 allow_single_quote_strings: true,
1851 allow_double_quote_strings: true,
1852 allow_triple_quote_strings: false,
1853 allow_csharp_verbatim_strings: false,
1854 symbol_patterns: SP_ELIXIR,
1855 has_preprocessor: false,
1856 },
1857 ),
1858 (
1859 Language::Erlang,
1860 StaticLangConfig {
1861 line_comments: &["%"],
1862 block_comment: None,
1863 allow_single_quote_strings: false,
1864 allow_double_quote_strings: true,
1865 allow_triple_quote_strings: false,
1866 allow_csharp_verbatim_strings: false,
1867 symbol_patterns: SP_ERLANG,
1868 has_preprocessor: false,
1869 },
1870 ),
1871 (
1872 Language::FSharp,
1873 StaticLangConfig {
1874 line_comments: &["//"],
1875 block_comment: Some(("(*", "*)")),
1876 allow_single_quote_strings: false,
1877 allow_double_quote_strings: true,
1878 allow_triple_quote_strings: false,
1879 allow_csharp_verbatim_strings: false,
1880 symbol_patterns: SP_FSHARP,
1881 has_preprocessor: false,
1882 },
1883 ),
1884 (
1885 Language::Groovy,
1886 StaticLangConfig {
1887 line_comments: &["//"],
1888 block_comment: Some(("/*", "*/")),
1889 allow_single_quote_strings: true,
1890 allow_double_quote_strings: true,
1891 allow_triple_quote_strings: false,
1892 allow_csharp_verbatim_strings: false,
1893 symbol_patterns: SP_GROOVY,
1894 has_preprocessor: false,
1895 },
1896 ),
1897 (
1898 Language::Haskell,
1899 StaticLangConfig {
1900 line_comments: &["--"],
1901 block_comment: Some(("{-", "-}")),
1902 allow_single_quote_strings: true,
1903 allow_double_quote_strings: true,
1904 allow_triple_quote_strings: false,
1905 allow_csharp_verbatim_strings: false,
1906 symbol_patterns: SP_HASKELL,
1907 has_preprocessor: false,
1908 },
1909 ),
1910 (
1911 Language::Html,
1912 StaticLangConfig {
1913 line_comments: &[],
1914 block_comment: Some(("<!--", "-->")),
1915 allow_single_quote_strings: false,
1916 allow_double_quote_strings: false,
1917 allow_triple_quote_strings: false,
1918 allow_csharp_verbatim_strings: false,
1919 symbol_patterns: SP_NONE,
1920 has_preprocessor: false,
1921 },
1922 ),
1923 (
1924 Language::Julia,
1925 StaticLangConfig {
1926 line_comments: &["#"],
1927 block_comment: Some(("#=", "=#")),
1928 allow_single_quote_strings: false,
1929 allow_double_quote_strings: true,
1930 allow_triple_quote_strings: true,
1931 allow_csharp_verbatim_strings: false,
1932 symbol_patterns: SP_JULIA,
1933 has_preprocessor: false,
1934 },
1935 ),
1936 (
1937 Language::Kotlin,
1938 StaticLangConfig {
1939 line_comments: &["//"],
1940 block_comment: Some(("/*", "*/")),
1941 allow_single_quote_strings: true,
1942 allow_double_quote_strings: true,
1943 allow_triple_quote_strings: false,
1944 allow_csharp_verbatim_strings: false,
1945 symbol_patterns: SP_KOTLIN,
1946 has_preprocessor: false,
1947 },
1948 ),
1949 (
1950 Language::Lua,
1951 StaticLangConfig {
1952 line_comments: &["--"],
1953 block_comment: Some(("--[[", "]]")),
1954 allow_single_quote_strings: true,
1955 allow_double_quote_strings: true,
1956 allow_triple_quote_strings: false,
1957 allow_csharp_verbatim_strings: false,
1958 symbol_patterns: SP_LUA,
1959 has_preprocessor: false,
1960 },
1961 ),
1962 (
1963 Language::Makefile,
1964 StaticLangConfig {
1965 line_comments: &["#"],
1966 block_comment: None,
1967 allow_single_quote_strings: false,
1968 allow_double_quote_strings: false,
1969 allow_triple_quote_strings: false,
1970 allow_csharp_verbatim_strings: false,
1971 symbol_patterns: SP_NONE,
1972 has_preprocessor: false,
1973 },
1974 ),
1975 (
1976 Language::Nim,
1977 StaticLangConfig {
1978 line_comments: &["#"],
1979 block_comment: Some(("#[", "]#")),
1980 allow_single_quote_strings: true,
1981 allow_double_quote_strings: true,
1982 allow_triple_quote_strings: false,
1983 allow_csharp_verbatim_strings: false,
1984 symbol_patterns: SP_NIM,
1985 has_preprocessor: false,
1986 },
1987 ),
1988 (
1989 Language::Ocaml,
1990 StaticLangConfig {
1991 line_comments: &[],
1992 block_comment: Some(("(*", "*)")),
1993 allow_single_quote_strings: false,
1994 allow_double_quote_strings: true,
1995 allow_triple_quote_strings: false,
1996 allow_csharp_verbatim_strings: false,
1997 symbol_patterns: SP_OCAML,
1998 has_preprocessor: false,
1999 },
2000 ),
2001 (
2002 Language::Perl,
2003 StaticLangConfig {
2004 line_comments: &["#"],
2005 block_comment: None,
2006 allow_single_quote_strings: true,
2007 allow_double_quote_strings: true,
2008 allow_triple_quote_strings: false,
2009 allow_csharp_verbatim_strings: false,
2010 symbol_patterns: SP_PERL,
2011 has_preprocessor: false,
2012 },
2013 ),
2014 (
2015 Language::Php,
2016 StaticLangConfig {
2017 line_comments: &["//", "#"],
2018 block_comment: Some(("/*", "*/")),
2019 allow_single_quote_strings: true,
2020 allow_double_quote_strings: true,
2021 allow_triple_quote_strings: false,
2022 allow_csharp_verbatim_strings: false,
2023 symbol_patterns: SP_PHP,
2024 has_preprocessor: false,
2025 },
2026 ),
2027 (
2028 Language::R,
2029 StaticLangConfig {
2030 line_comments: &["#"],
2031 block_comment: None,
2032 allow_single_quote_strings: true,
2033 allow_double_quote_strings: true,
2034 allow_triple_quote_strings: false,
2035 allow_csharp_verbatim_strings: false,
2036 symbol_patterns: SP_R,
2037 has_preprocessor: false,
2038 },
2039 ),
2040 (
2041 Language::Ruby,
2042 StaticLangConfig {
2043 line_comments: &["#"],
2044 block_comment: None,
2045 allow_single_quote_strings: true,
2046 allow_double_quote_strings: true,
2047 allow_triple_quote_strings: false,
2048 allow_csharp_verbatim_strings: false,
2049 symbol_patterns: SP_RUBY,
2050 has_preprocessor: false,
2051 },
2052 ),
2053 (
2054 Language::Scala,
2055 StaticLangConfig {
2056 line_comments: &["//"],
2057 block_comment: Some(("/*", "*/")),
2058 allow_single_quote_strings: true,
2059 allow_double_quote_strings: true,
2060 allow_triple_quote_strings: false,
2061 allow_csharp_verbatim_strings: false,
2062 symbol_patterns: SP_SCALA,
2063 has_preprocessor: false,
2064 },
2065 ),
2066 (
2067 Language::Scss,
2068 StaticLangConfig {
2069 line_comments: &["//"],
2070 block_comment: Some(("/*", "*/")),
2071 allow_single_quote_strings: true,
2072 allow_double_quote_strings: true,
2073 allow_triple_quote_strings: false,
2074 allow_csharp_verbatim_strings: false,
2075 symbol_patterns: SP_NONE,
2076 has_preprocessor: false,
2077 },
2078 ),
2079 (
2080 Language::Sql,
2081 StaticLangConfig {
2082 line_comments: &["--"],
2083 block_comment: Some(("/*", "*/")),
2084 allow_single_quote_strings: true,
2085 allow_double_quote_strings: false,
2086 allow_triple_quote_strings: false,
2087 allow_csharp_verbatim_strings: false,
2088 symbol_patterns: SP_SQL,
2089 has_preprocessor: false,
2090 },
2091 ),
2092 (
2093 Language::Swift,
2094 StaticLangConfig {
2095 line_comments: &["//"],
2096 block_comment: Some(("/*", "*/")),
2097 allow_single_quote_strings: false,
2098 allow_double_quote_strings: true,
2099 allow_triple_quote_strings: false,
2100 allow_csharp_verbatim_strings: false,
2101 symbol_patterns: SP_SWIFT,
2102 has_preprocessor: false,
2103 },
2104 ),
2105 (
2106 Language::Xml,
2107 StaticLangConfig {
2108 line_comments: &[],
2109 block_comment: Some(("<!--", "-->")),
2110 allow_single_quote_strings: false,
2111 allow_double_quote_strings: false,
2112 allow_triple_quote_strings: false,
2113 allow_csharp_verbatim_strings: false,
2114 symbol_patterns: SP_NONE,
2115 has_preprocessor: false,
2116 },
2117 ),
2118 (
2119 Language::Zig,
2120 StaticLangConfig {
2121 line_comments: &["//"],
2122 block_comment: None,
2123 allow_single_quote_strings: true,
2124 allow_double_quote_strings: true,
2125 allow_triple_quote_strings: false,
2126 allow_csharp_verbatim_strings: false,
2127 symbol_patterns: SP_ZIG,
2128 has_preprocessor: false,
2129 },
2130 ),
2131];
2132
2133#[derive(Debug, Clone, Copy)]
2136struct IeeeFlags {
2137 has_preprocessor_directives: bool,
2139 blank_in_block_comment_as_comment: bool,
2141 collapse_continuation_lines: bool,
2143}
2144
2145#[derive(Debug, Clone, Copy)]
2146enum StringState {
2147 Single(char),
2148 Triple(&'static str),
2149 VerbatimDouble,
2150}
2151
2152#[allow(clippy::struct_excessive_bools)]
2153#[derive(Debug, Default)]
2154struct LineFacts {
2155 has_code: bool,
2156 has_single_comment: bool,
2157 has_multi_comment: bool,
2158 has_docstring: bool,
2159}
2160
2161fn process_string_char(
2165 state: StringState,
2166 chars: &[char],
2167 i: usize,
2168) -> (Option<StringState>, usize) {
2169 match state {
2170 StringState::Single(delim) => {
2171 if chars[i] == '\\' {
2172 return (Some(state), 2); }
2174 if chars[i] == delim {
2175 (None, 1)
2176 } else {
2177 (Some(state), 1)
2178 }
2179 }
2180 StringState::Triple(delim) => {
2181 if starts_with(chars, i, delim) {
2182 (None, delim.len())
2183 } else {
2184 (Some(state), 1)
2185 }
2186 }
2187 StringState::VerbatimDouble => {
2188 if starts_with(chars, i, "\"\"") {
2189 return (Some(state), 2); }
2191 if chars[i] == '"' {
2192 (None, 1)
2193 } else {
2194 (Some(state), 1)
2195 }
2196 }
2197 }
2198}
2199
2200fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2204 if starts_with(chars, i, close) {
2205 (false, close.len())
2206 } else {
2207 (true, 1)
2208 }
2209}
2210
2211fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2215 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2216 return Some((StringState::VerbatimDouble, 2));
2217 }
2218 if config.allow_triple_quote_strings {
2219 if starts_with(chars, i, "\"\"\"") {
2220 return Some((StringState::Triple("\"\"\""), 3));
2221 }
2222 if starts_with(chars, i, "'''") {
2223 return Some((StringState::Triple("'''"), 3));
2224 }
2225 }
2226 if config.allow_single_quote_strings && chars[i] == '\'' {
2227 return Some((StringState::Single('\''), 1));
2228 }
2229 if config.allow_double_quote_strings && chars[i] == '"' {
2230 return Some((StringState::Single('"'), 1));
2231 }
2232 None
2233}
2234
2235fn step_through_block_comment(
2241 chars: &[char],
2242 i: usize,
2243 block_comment: Option<(&'static str, &'static str)>,
2244 in_block_comment: &mut bool,
2245) -> usize {
2246 if let Some((_, close)) = block_comment {
2247 let (still_in, advance) = process_block_comment_char(chars, i, close);
2248 *in_block_comment = still_in;
2249 return advance;
2250 }
2251 0
2252}
2253
2254fn try_open_block_comment(
2257 chars: &[char],
2258 i: usize,
2259 block_comment: Option<(&'static str, &'static str)>,
2260) -> Option<usize> {
2261 let (open, _) = block_comment?;
2262 starts_with(chars, i, open).then_some(open.len())
2263}
2264
2265fn scan_line(
2269 chars: &[char],
2270 config: &ScanConfig,
2271 facts: &mut LineFacts,
2272 in_block_comment: &mut bool,
2273 string_state: &mut Option<StringState>,
2274) {
2275 let mut i = 0usize;
2276 while i < chars.len() {
2277 if let Some(state) = *string_state {
2279 facts.has_code = true;
2280 let (new_state, advance) = process_string_char(state, chars, i);
2281 *string_state = new_state;
2282 i += advance;
2283 continue;
2284 }
2285
2286 if *in_block_comment {
2288 facts.has_multi_comment = true;
2289 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2290 continue;
2291 }
2292
2293 if chars[i].is_whitespace() {
2295 i += 1;
2296 continue;
2297 }
2298
2299 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2301 facts.has_code = true;
2302 *string_state = Some(new_state);
2303 i += advance;
2304 continue;
2305 }
2306
2307 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2309 facts.has_multi_comment = true;
2310 *in_block_comment = true;
2311 i += advance;
2312 continue;
2313 }
2314
2315 if config
2317 .line_comments
2318 .iter()
2319 .any(|prefix| starts_with(chars, i, prefix))
2320 {
2321 facts.has_single_comment = true;
2322 break;
2323 }
2324
2325 facts.has_code = true;
2327 i += 1;
2328 }
2329}
2330
2331fn finalize_line_facts(
2336 facts: LineFacts,
2337 trimmed: &str,
2338 raw: &mut RawLineCounts,
2339 ieee: IeeeFlags,
2340 in_block_comment: bool,
2341 string_state: Option<StringState>,
2342 pending_continuation: &mut Option<LineFacts>,
2343) -> Option<LineFacts> {
2344 if ieee.has_preprocessor_directives
2348 && facts.has_code
2349 && !facts.has_single_comment
2350 && !facts.has_multi_comment
2351 && trimmed.starts_with('#')
2352 {
2353 raw.compiler_directive_lines += 1;
2354 }
2355
2356 let is_continuation = ieee.collapse_continuation_lines
2359 && !in_block_comment
2360 && string_state.is_none()
2361 && trimmed.ends_with('\\');
2362
2363 if is_continuation {
2364 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2365 pending.has_code |= facts.has_code;
2366 pending.has_single_comment |= facts.has_single_comment;
2367 pending.has_multi_comment |= facts.has_multi_comment;
2368 pending.has_docstring |= facts.has_docstring;
2369 return None; }
2371
2372 let emit = if let Some(pending) = pending_continuation.take() {
2374 LineFacts {
2375 has_code: pending.has_code | facts.has_code,
2376 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2377 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2378 has_docstring: pending.has_docstring | facts.has_docstring,
2379 }
2380 } else {
2381 facts
2382 };
2383 Some(emit)
2384}
2385
2386#[allow(clippy::needless_pass_by_value)]
2391#[allow(clippy::too_many_arguments)]
2392#[allow(clippy::many_single_char_names)] fn process_physical_line(
2394 line: &str,
2395 line_idx: usize,
2396 config: &ScanConfig,
2397 raw: &mut RawLineCounts,
2398 in_block_comment: &mut bool,
2399 string_state: &mut Option<StringState>,
2400 pending_continuation: &mut Option<LineFacts>,
2401 ieee: IeeeFlags,
2402) {
2403 raw.total_physical_lines += 1;
2404
2405 if config.skip_lines.contains(&line_idx) {
2406 raw.docstring_comment_lines += 1;
2407 return;
2408 }
2409
2410 let trimmed = line.trim();
2411 let mut facts = LineFacts::default();
2412
2413 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2417 facts.has_multi_comment = true;
2418 }
2419
2420 let chars: Vec<char> = line.chars().collect();
2421 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2422
2423 let Some(emit) = finalize_line_facts(
2424 facts,
2425 trimmed,
2426 raw,
2427 ieee,
2428 *in_block_comment,
2429 *string_state,
2430 pending_continuation,
2431 ) else {
2432 return;
2433 };
2434
2435 classify_line(raw, &emit, trimmed);
2436
2437 if emit.has_code {
2438 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2439 raw.functions += f;
2440 raw.classes += c;
2441 raw.variables += v;
2442 raw.imports += i;
2443 raw.test_count += t;
2444 raw.test_assertion_count += a;
2445 raw.test_suite_count += s;
2446 }
2447}
2448
2449#[allow(clippy::needless_pass_by_value)]
2450fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2451 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2452 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2453
2454 let mut raw = RawLineCounts::default();
2455 let mut warnings = Vec::new();
2456
2457 let mut in_block_comment = false;
2458 let mut string_state: Option<StringState> = None;
2459 let mut pending_continuation: Option<LineFacts> = None;
2461
2462 for (line_idx, line) in lines.iter().enumerate() {
2463 process_physical_line(
2464 line,
2465 line_idx,
2466 &config,
2467 &mut raw,
2468 &mut in_block_comment,
2469 &mut string_state,
2470 &mut pending_continuation,
2471 ieee,
2472 );
2473 }
2474
2475 if let Some(pending) = pending_continuation.take() {
2477 classify_line(&mut raw, &pending, "");
2478 }
2479
2480 if in_block_comment {
2481 warnings.push("unclosed block comment detected; result is best effort".into());
2482 }
2483 if string_state.is_some() {
2484 warnings.push("unclosed string literal detected; result is best effort".into());
2485 }
2486
2487 RawFileAnalysis {
2488 raw,
2489 parse_mode: if warnings.is_empty() {
2490 ParseMode::Lexical
2491 } else {
2492 ParseMode::LexicalBestEffort
2493 },
2494 warnings,
2495 }
2496}
2497
2498const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2499 if facts.has_docstring {
2500 raw.docstring_comment_lines += 1;
2501 } else if !facts.has_code
2502 && !facts.has_single_comment
2503 && !facts.has_multi_comment
2504 && trimmed.is_empty()
2505 {
2506 raw.blank_only_lines += 1;
2507 } else if facts.has_code && facts.has_single_comment {
2508 raw.mixed_code_single_comment_lines += 1;
2509 } else if facts.has_code && facts.has_multi_comment {
2510 raw.mixed_code_multi_comment_lines += 1;
2511 } else if facts.has_code {
2512 raw.code_only_lines += 1;
2513 } else if facts.has_single_comment {
2514 raw.single_comment_only_lines += 1;
2515 } else if facts.has_multi_comment {
2516 raw.multi_comment_only_lines += 1;
2517 } else if trimmed.is_empty() {
2518 raw.blank_only_lines += 1;
2519 } else {
2520 raw.skipped_unknown_lines += 1;
2521 }
2522}
2523
2524fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2525 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2526 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
2529 0
2530 } else if let Some(paren_pos) = trimmed.find('(') {
2531 if !trimmed[..paren_pos].contains('=') {
2532 hit(patterns.functions_prefix_paren)
2533 } else {
2534 0
2535 }
2536 } else {
2537 0
2538 };
2539 (
2540 hit(patterns.functions) | fn_pp,
2541 hit(patterns.classes),
2542 hit(patterns.variables),
2543 hit(patterns.imports),
2544 hit(patterns.tests),
2545 hit(patterns.assertions),
2546 hit(patterns.test_suites),
2547 )
2548}
2549
2550fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2551 let needle_chars: Vec<char> = needle.chars().collect();
2552 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2553}
2554
2555#[derive(Debug, Clone)]
2556struct PyContext {
2557 indent: usize,
2558 expect_docstring: bool,
2559}
2560
2561fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2563 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2564 contexts.pop();
2565 }
2566}
2567
2568fn py_handle_pending_indent(
2571 pending_block_indent: &mut Option<usize>,
2572 contexts: &mut Vec<PyContext>,
2573 indent: usize,
2574 trimmed: &str,
2575) {
2576 let Some(base_indent) = *pending_block_indent else {
2577 return;
2578 };
2579 if indent > base_indent {
2580 contexts.push(PyContext {
2581 indent,
2582 expect_docstring: true,
2583 });
2584 *pending_block_indent = None;
2585 } else if !trimmed.starts_with('@') {
2586 *pending_block_indent = None;
2587 }
2588}
2589
2590fn py_try_record_docstring(
2596 ctx: &mut PyContext,
2597 trimmed: &str,
2598 idx: usize,
2599 docstring_lines: &mut HashSet<usize>,
2600 active_docstring: &mut Option<(&'static str, usize)>,
2601) -> bool {
2602 if !ctx.expect_docstring {
2603 return false;
2604 }
2605 if let Some(delim) = docstring_delimiter(trimmed) {
2606 docstring_lines.insert(idx);
2607 ctx.expect_docstring = false;
2608 if !closes_triple_docstring(trimmed, delim, true) {
2609 *active_docstring = Some((delim, idx));
2610 }
2611 return true;
2612 }
2613 ctx.expect_docstring = false;
2614 false
2615}
2616
2617fn track_active_docstring(
2621 active_docstring: &mut Option<(&'static str, usize)>,
2622 docstring_lines: &mut HashSet<usize>,
2623 idx: usize,
2624 trimmed: &str,
2625) -> bool {
2626 let Some((delim, start_line)) = *active_docstring else {
2627 return false;
2628 };
2629 docstring_lines.insert(idx);
2630 if closes_triple_docstring(trimmed, delim, idx == start_line) {
2631 *active_docstring = None;
2632 }
2633 true
2634}
2635
2636fn try_record_docstring_if_context(
2639 contexts: &mut [PyContext],
2640 trimmed: &str,
2641 idx: usize,
2642 docstring_lines: &mut HashSet<usize>,
2643 active_docstring: &mut Option<(&'static str, usize)>,
2644) -> bool {
2645 let Some(ctx) = contexts.last_mut() else {
2646 return false;
2647 };
2648 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2649}
2650
2651fn mark_unclosed_docstring_lines(
2653 active_docstring: Option<&(&'static str, usize)>,
2654 docstring_lines: &mut HashSet<usize>,
2655 num_lines: usize,
2656) {
2657 if let Some(&(_, start_line)) = active_docstring {
2658 for idx in start_line..num_lines {
2659 docstring_lines.insert(idx);
2660 }
2661 }
2662}
2663
2664fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2665 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2666 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2667
2668 let mut docstring_lines = HashSet::new();
2669 let mut contexts = vec![PyContext {
2670 indent: 0,
2671 expect_docstring: true,
2672 }];
2673 let mut pending_block_indent: Option<usize> = None;
2674 let mut active_docstring: Option<(&'static str, usize)> = None;
2675
2676 for (idx, line) in lines.iter().enumerate() {
2677 let trimmed = line.trim();
2678 let indent = leading_indent(line);
2679
2680 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2681 continue;
2682 }
2683
2684 if trimmed.is_empty() || trimmed.starts_with('#') {
2686 continue;
2687 }
2688
2689 py_pop_outdented_contexts(&mut contexts, indent);
2690 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2691
2692 if try_record_docstring_if_context(
2693 &mut contexts,
2694 trimmed,
2695 idx,
2696 &mut docstring_lines,
2697 &mut active_docstring,
2698 ) {
2699 continue;
2700 }
2701
2702 if is_python_block_header(trimmed) {
2703 pending_block_indent = Some(indent);
2704 }
2705 }
2706
2707 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2708
2709 docstring_lines
2710}
2711
2712fn leading_indent(line: &str) -> usize {
2713 line.chars().take_while(|c| c.is_whitespace()).count()
2714}
2715
2716fn is_python_block_header(trimmed: &str) -> bool {
2717 (trimmed.starts_with("def ")
2718 || trimmed.starts_with("async def ")
2719 || trimmed.starts_with("class "))
2720 && trimmed.ends_with(':')
2721}
2722
2723fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2724 let mut idx = 0usize;
2725 let bytes = trimmed.as_bytes();
2726 while idx < bytes.len() {
2727 let c = bytes[idx] as char;
2728 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2729 idx += 1;
2730 continue;
2731 }
2732 break;
2733 }
2734
2735 let rest = &trimmed[idx..];
2736 if rest.starts_with("\"\"\"") {
2737 Some("\"\"\"")
2738 } else if rest.starts_with("'''") {
2739 Some("'''")
2740 } else {
2741 None
2742 }
2743}
2744
2745fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2746 let mut occurrences = 0usize;
2747 let mut search = trimmed;
2748 while let Some(index) = search.find(delim) {
2749 occurrences += 1;
2750 search = &search[index + delim.len()..];
2751 }
2752
2753 if same_line_as_start {
2754 occurrences >= 2
2755 } else {
2756 occurrences >= 1
2757 }
2758}
2759
2760#[cfg(feature = "tree-sitter")]
2765pub mod ts {
2766 use tree_sitter::Node;
2767
2768 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2769
2770 fn analyze_lines(
2775 text: &str,
2776 ts_language: &tree_sitter::Language,
2777 comment_node_kinds: &[&str],
2778 docstring_stmt_kind: Option<&str>,
2779 ) -> Option<RawFileAnalysis> {
2780 let mut parser = tree_sitter::Parser::new();
2781 parser.set_language(ts_language).ok()?;
2782 let tree = parser.parse(text, None)?;
2783
2784 let lines: Vec<&str> = text.split_terminator('\n').collect();
2785 let n = lines.len();
2786
2787 let mut has_code = vec![false; n];
2788 let mut has_comment = vec![false; n];
2789 let mut comment_is_block = vec![false; n];
2790 let mut has_docstring = vec![false; n];
2791
2792 let mut ctx = VisitCtx {
2794 source: text.as_bytes(),
2795 comment_kinds: comment_node_kinds,
2796 docstring_stmt_kind,
2797 has_code: &mut has_code,
2798 has_comment: &mut has_comment,
2799 comment_is_block: &mut comment_is_block,
2800 has_docstring: &mut has_docstring,
2801 };
2802 visit(tree.root_node(), &mut ctx);
2803
2804 let mut raw = RawLineCounts::default();
2805 classify_ts_lines(
2806 &lines,
2807 &has_code,
2808 &has_comment,
2809 &comment_is_block,
2810 &has_docstring,
2811 &mut raw,
2812 );
2813
2814 Some(RawFileAnalysis {
2815 raw,
2816 parse_mode: ParseMode::TreeSitter,
2817 warnings: Vec::new(),
2818 })
2819 }
2820
2821 #[allow(clippy::struct_excessive_bools)]
2824 #[derive(Clone, Copy)]
2825 struct TsLineFlags {
2826 has_code: bool,
2827 has_comment: bool,
2828 comment_is_block: bool,
2829 has_docstring: bool,
2830 }
2831
2832 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2834 if trimmed.is_empty() {
2835 raw.blank_only_lines += 1;
2836 } else if flags.has_docstring && !flags.has_code {
2837 raw.docstring_comment_lines += 1;
2838 } else if flags.has_code && flags.has_comment {
2839 if flags.comment_is_block {
2841 raw.mixed_code_multi_comment_lines += 1;
2842 } else {
2843 raw.mixed_code_single_comment_lines += 1;
2844 }
2845 } else if flags.has_comment {
2846 if flags.comment_is_block {
2847 raw.multi_comment_only_lines += 1;
2848 } else {
2849 raw.single_comment_only_lines += 1;
2850 }
2851 } else {
2852 raw.code_only_lines += 1;
2853 }
2854 }
2855
2856 fn classify_ts_lines(
2858 lines: &[&str],
2859 has_code: &[bool],
2860 has_comment: &[bool],
2861 comment_is_block: &[bool],
2862 has_docstring: &[bool],
2863 raw: &mut RawLineCounts,
2864 ) {
2865 for i in 0..lines.len() {
2866 raw.total_physical_lines += 1;
2867 classify_ts_line(
2868 lines[i].trim(),
2869 TsLineFlags {
2870 has_code: has_code[i],
2871 has_comment: has_comment[i],
2872 comment_is_block: comment_is_block[i],
2873 has_docstring: has_docstring[i],
2874 },
2875 raw,
2876 );
2877 }
2878 }
2879
2880 struct VisitCtx<'a> {
2881 source: &'a [u8],
2882 comment_kinds: &'a [&'a str],
2883 docstring_stmt_kind: Option<&'a str>,
2884 has_code: &'a mut Vec<bool>,
2885 has_comment: &'a mut Vec<bool>,
2886 comment_is_block: &'a mut Vec<bool>,
2887 has_docstring: &'a mut Vec<bool>,
2888 }
2889
2890 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2892 let start_row = node.start_position().row;
2893 let end_row = node.end_position().row;
2894 let first_two = node
2895 .utf8_text(ctx.source)
2896 .unwrap_or("")
2897 .get(..2)
2898 .unwrap_or("");
2899 let is_block = first_two == "/*" || first_two == "<#";
2900 for row in start_row..=end_row {
2901 if row < ctx.has_comment.len() {
2902 ctx.has_comment[row] = true;
2903 if is_block {
2904 ctx.comment_is_block[row] = true;
2905 }
2906 }
2907 }
2908 }
2909
2910 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2913 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2914 return false;
2915 };
2916 if kind != stmt_kind || node.named_child_count() != 1 {
2917 return false;
2918 }
2919 let Some(child) = node.named_child(0) else {
2920 return false;
2921 };
2922 if child.kind() != "string" {
2923 return false;
2924 }
2925 let child_start = child.start_position().row;
2926 let child_end = child.end_position().row;
2927 for row in child_start..=child_end {
2928 if row < ctx.has_docstring.len() {
2929 ctx.has_docstring[row] = true;
2930 }
2931 }
2932 true
2933 }
2934
2935 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2937 let start_row = node.start_position().row;
2938 let end_row = node.end_position().row;
2939 for row in start_row..=end_row {
2940 if row < ctx.has_code.len() {
2941 ctx.has_code[row] = true;
2942 }
2943 }
2944 }
2945
2946 #[allow(clippy::too_many_lines)]
2947 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2948 let kind = node.kind();
2950
2951 if ctx.comment_kinds.contains(&kind) {
2953 visit_comment_node(node, ctx);
2954 return;
2955 }
2956
2957 if visit_maybe_docstring(node, kind, ctx) {
2959 return;
2960 }
2961
2962 if node.child_count() == 0 && !node.is_extra() {
2964 visit_leaf_code(node, ctx);
2965 return;
2966 }
2967
2968 for i in 0..node.child_count() {
2969 #[allow(clippy::cast_possible_truncation)]
2970 if let Some(child) = node.child(i as u32) {
2972 visit(child, ctx);
2973 }
2974 }
2975 }
2976
2977 #[must_use]
2979 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
2980 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
2981 analyze_lines(text, &lang, &["comment"], None)
2982 }
2983
2984 #[must_use]
2986 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
2987 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2988 analyze_lines(text, &lang, &["comment"], Some("expression_statement"))
2989 }
2990}
2991
2992#[cfg(test)]
2993mod tests {
2994 use super::*;
2995
2996 #[test]
2997 fn python_docstrings_are_separated() {
2998 let input = r#""""module docs"""
2999
3000
3001def fn_a():
3002 """function docs"""
3003 value = 1 # trailing comment
3004 return value
3005"#;
3006
3007 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3008 assert_eq!(result.raw.docstring_comment_lines, 2);
3009 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3010 assert_eq!(result.raw.code_only_lines, 2);
3011 }
3012
3013 #[test]
3014 fn c_style_mixed_lines_are_captured() {
3015 let input = "int x = 1; // note\n/* block */\n";
3016 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3017 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3018 assert_eq!(result.raw.multi_comment_only_lines, 1);
3019 }
3020
3021 #[test]
3022 fn detect_language_by_shebang() {
3023 let language = detect_language(
3024 Path::new("script"),
3025 Some("#!/usr/bin/env bash"),
3026 &BTreeMap::new(),
3027 true,
3028 );
3029 assert_eq!(language, Some(Language::Shell));
3030 }
3031}