1use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12 C,
13 Cpp,
14 CSharp,
15 Go,
16 Java,
17 JavaScript,
18 Python,
19 Rust,
20 Shell,
21 PowerShell,
22 TypeScript,
23 Assembly,
25 Clojure,
26 Css,
27 Dart,
28 Dockerfile,
29 Elixir,
30 Erlang,
31 FSharp,
32 Groovy,
33 Haskell,
34 Html,
35 Julia,
36 Kotlin,
37 Lua,
38 Makefile,
39 Nim,
40 ObjectiveC,
41 Ocaml,
42 Perl,
43 Php,
44 R,
45 Ruby,
46 Scala,
47 Scss,
48 Sql,
49 Svelte,
50 Swift,
51 Vue,
52 Xml,
53 Zig,
54}
55
56impl Language {
57 #[must_use]
58 pub const fn display_name(&self) -> &'static str {
59 match self {
60 Self::C => "C",
61 Self::Cpp => "C++",
62 Self::CSharp => "C#",
63 Self::Go => "Go",
64 Self::Java => "Java",
65 Self::JavaScript => "JavaScript",
66 Self::Python => "Python",
67 Self::Rust => "Rust",
68 Self::Shell => "Shell",
69 Self::PowerShell => "PowerShell",
70 Self::TypeScript => "TypeScript",
71 Self::Assembly => "Assembly",
72 Self::Clojure => "Clojure",
73 Self::Css => "CSS",
74 Self::Dart => "Dart",
75 Self::Dockerfile => "Dockerfile",
76 Self::Elixir => "Elixir",
77 Self::Erlang => "Erlang",
78 Self::FSharp => "F#",
79 Self::Groovy => "Groovy",
80 Self::Haskell => "Haskell",
81 Self::Html => "HTML",
82 Self::Julia => "Julia",
83 Self::Kotlin => "Kotlin",
84 Self::Lua => "Lua",
85 Self::Makefile => "Makefile",
86 Self::Nim => "Nim",
87 Self::ObjectiveC => "Objective-C",
88 Self::Ocaml => "OCaml",
89 Self::Perl => "Perl",
90 Self::Php => "PHP",
91 Self::R => "R",
92 Self::Ruby => "Ruby",
93 Self::Scala => "Scala",
94 Self::Scss => "SCSS",
95 Self::Sql => "SQL",
96 Self::Svelte => "Svelte",
97 Self::Swift => "Swift",
98 Self::Vue => "Vue",
99 Self::Xml => "XML",
100 Self::Zig => "Zig",
101 }
102 }
103
104 #[must_use]
105 pub const fn as_slug(&self) -> &'static str {
106 match self {
107 Self::C => "c",
108 Self::Cpp => "cpp",
109 Self::CSharp => "csharp",
110 Self::Go => "go",
111 Self::Java => "java",
112 Self::JavaScript => "javascript",
113 Self::Python => "python",
114 Self::Rust => "rust",
115 Self::Shell => "shell",
116 Self::PowerShell => "powershell",
117 Self::TypeScript => "typescript",
118 Self::Assembly => "assembly",
119 Self::Clojure => "clojure",
120 Self::Css => "css",
121 Self::Dart => "dart",
122 Self::Dockerfile => "dockerfile",
123 Self::Elixir => "elixir",
124 Self::Erlang => "erlang",
125 Self::FSharp => "fsharp",
126 Self::Groovy => "groovy",
127 Self::Haskell => "haskell",
128 Self::Html => "html",
129 Self::Julia => "julia",
130 Self::Kotlin => "kotlin",
131 Self::Lua => "lua",
132 Self::Makefile => "makefile",
133 Self::Nim => "nim",
134 Self::ObjectiveC => "objectivec",
135 Self::Ocaml => "ocaml",
136 Self::Perl => "perl",
137 Self::Php => "php",
138 Self::R => "r",
139 Self::Ruby => "ruby",
140 Self::Scala => "scala",
141 Self::Scss => "scss",
142 Self::Sql => "sql",
143 Self::Svelte => "svelte",
144 Self::Swift => "swift",
145 Self::Vue => "vue",
146 Self::Xml => "xml",
147 Self::Zig => "zig",
148 }
149 }
150
151 #[must_use]
152 pub fn from_name(name: &str) -> Option<Self> {
153 match name.trim().to_ascii_lowercase().as_str() {
154 "c" => Some(Self::C),
155 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
156 "csharp" | "c#" | "cs" => Some(Self::CSharp),
157 "go" | "golang" => Some(Self::Go),
158 "java" => Some(Self::Java),
159 "javascript" | "js" => Some(Self::JavaScript),
160 "python" | "py" => Some(Self::Python),
161 "rust" | "rs" => Some(Self::Rust),
162 "shell" | "sh" | "bash" => Some(Self::Shell),
163 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
164 "typescript" | "ts" => Some(Self::TypeScript),
165 "assembly" | "asm" => Some(Self::Assembly),
166 "clojure" | "clj" => Some(Self::Clojure),
167 "css" => Some(Self::Css),
168 "dart" => Some(Self::Dart),
169 "dockerfile" | "docker" => Some(Self::Dockerfile),
170 "elixir" | "ex" => Some(Self::Elixir),
171 "erlang" | "erl" => Some(Self::Erlang),
172 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
173 "groovy" => Some(Self::Groovy),
174 "haskell" | "hs" => Some(Self::Haskell),
175 "html" | "htm" => Some(Self::Html),
176 "julia" | "jl" => Some(Self::Julia),
177 "kotlin" | "kt" => Some(Self::Kotlin),
178 "lua" => Some(Self::Lua),
179 "makefile" | "make" | "mk" => Some(Self::Makefile),
180 "nim" => Some(Self::Nim),
181 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
182 "ocaml" | "ml" => Some(Self::Ocaml),
183 "perl" | "pl" => Some(Self::Perl),
184 "php" => Some(Self::Php),
185 "r" => Some(Self::R),
186 "ruby" | "rb" => Some(Self::Ruby),
187 "scala" => Some(Self::Scala),
188 "scss" | "sass" => Some(Self::Scss),
189 "sql" => Some(Self::Sql),
190 "svelte" => Some(Self::Svelte),
191 "swift" => Some(Self::Swift),
192 "vue" => Some(Self::Vue),
193 "xml" => Some(Self::Xml),
194 "zig" => Some(Self::Zig),
195 _ => None,
196 }
197 }
198}
199
200#[derive(Debug, Clone, Serialize, Deserialize, Default)]
201pub struct RawLineCounts {
202 pub total_physical_lines: u64,
203 pub blank_only_lines: u64,
204 pub code_only_lines: u64,
205 pub single_comment_only_lines: u64,
206 pub multi_comment_only_lines: u64,
207 pub mixed_code_single_comment_lines: u64,
208 pub mixed_code_multi_comment_lines: u64,
209 pub docstring_comment_lines: u64,
210 pub skipped_unknown_lines: u64,
211 #[serde(default)]
213 pub functions: u64,
214 #[serde(default)]
216 pub classes: u64,
217 #[serde(default)]
219 pub variables: u64,
220 #[serde(default)]
222 pub imports: u64,
223 #[serde(default)]
227 pub compiler_directive_lines: u64,
228 #[serde(default)]
231 pub test_count: u64,
232 #[serde(default)]
235 pub test_assertion_count: u64,
236 #[serde(default)]
239 pub test_suite_count: u64,
240}
241
242#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
243#[serde(rename_all = "snake_case")]
244pub enum ParseMode {
245 Lexical,
246 LexicalBestEffort,
247 TreeSitter,
248}
249
250#[derive(Debug, Clone, Serialize, Deserialize)]
251pub struct RawFileAnalysis {
252 pub raw: RawLineCounts,
253 pub parse_mode: ParseMode,
254 pub warnings: Vec<String>,
255}
256
257#[derive(Debug, Clone, Copy)]
262pub struct AnalysisOptions {
263 pub blank_in_block_comment_as_comment: bool,
266 pub collapse_continuation_lines: bool,
269}
270
271impl Default for AnalysisOptions {
272 fn default() -> Self {
273 Self {
274 blank_in_block_comment_as_comment: true,
275 collapse_continuation_lines: false,
276 }
277 }
278}
279
280#[must_use]
281pub fn supported_languages() -> BTreeSet<Language> {
282 [
283 Language::Assembly,
284 Language::C,
285 Language::Clojure,
286 Language::Cpp,
287 Language::CSharp,
288 Language::Css,
289 Language::Dart,
290 Language::Dockerfile,
291 Language::Elixir,
292 Language::Erlang,
293 Language::FSharp,
294 Language::Go,
295 Language::Groovy,
296 Language::Haskell,
297 Language::Html,
298 Language::Java,
299 Language::JavaScript,
300 Language::Julia,
301 Language::Kotlin,
302 Language::Lua,
303 Language::Makefile,
304 Language::Nim,
305 Language::ObjectiveC,
306 Language::Ocaml,
307 Language::Perl,
308 Language::Php,
309 Language::PowerShell,
310 Language::Python,
311 Language::R,
312 Language::Ruby,
313 Language::Rust,
314 Language::Scala,
315 Language::Scss,
316 Language::Shell,
317 Language::Sql,
318 Language::Svelte,
319 Language::Swift,
320 Language::TypeScript,
321 Language::Vue,
322 Language::Xml,
323 Language::Zig,
324 ]
325 .into_iter()
326 .collect()
327}
328
329fn detect_by_shebang(line: &str) -> Option<Language> {
331 let lower = line.to_ascii_lowercase();
332 if !lower.starts_with("#!") {
333 return None;
334 }
335 if lower.contains("python") {
336 return Some(Language::Python);
337 }
338 if lower.contains("pwsh") || lower.contains("powershell") {
339 return Some(Language::PowerShell);
340 }
341 if lower.contains("bash")
342 || lower.contains("/sh")
343 || lower.contains("zsh")
344 || lower.contains("ksh")
345 {
346 return Some(Language::Shell);
347 }
348 if lower.contains("ruby") {
349 return Some(Language::Ruby);
350 }
351 if lower.contains("perl") {
352 return Some(Language::Perl);
353 }
354 if lower.contains("php") {
355 return Some(Language::Php);
356 }
357 if lower.contains("node") || lower.contains("nodejs") {
358 return Some(Language::JavaScript);
359 }
360 None
361}
362
363fn detect_by_extension(ext: &str) -> Option<Language> {
365 static EXT_MAP: &[(&str, Language)] = &[
367 ("c", Language::C),
368 ("h", Language::C),
369 ("cc", Language::Cpp),
370 ("cp", Language::Cpp),
371 ("cpp", Language::Cpp),
372 ("cxx", Language::Cpp),
373 ("hh", Language::Cpp),
374 ("hpp", Language::Cpp),
375 ("hxx", Language::Cpp),
376 ("cs", Language::CSharp),
377 ("go", Language::Go),
378 ("java", Language::Java),
379 ("js", Language::JavaScript),
380 ("mjs", Language::JavaScript),
381 ("cjs", Language::JavaScript),
382 ("py", Language::Python),
383 ("rs", Language::Rust),
384 ("sh", Language::Shell),
385 ("bash", Language::Shell),
386 ("zsh", Language::Shell),
387 ("ksh", Language::Shell),
388 ("ps1", Language::PowerShell),
389 ("psm1", Language::PowerShell),
390 ("psd1", Language::PowerShell),
391 ("ts", Language::TypeScript),
392 ("mts", Language::TypeScript),
393 ("cts", Language::TypeScript),
394 ("asm", Language::Assembly),
395 ("s", Language::Assembly),
396 ("clj", Language::Clojure),
397 ("cljs", Language::Clojure),
398 ("cljc", Language::Clojure),
399 ("edn", Language::Clojure),
400 ("css", Language::Css),
401 ("dart", Language::Dart),
402 ("ex", Language::Elixir),
403 ("exs", Language::Elixir),
404 ("erl", Language::Erlang),
405 ("hrl", Language::Erlang),
406 ("fs", Language::FSharp),
407 ("fsi", Language::FSharp),
408 ("fsx", Language::FSharp),
409 ("groovy", Language::Groovy),
410 ("gradle", Language::Groovy),
411 ("hs", Language::Haskell),
412 ("lhs", Language::Haskell),
413 ("html", Language::Html),
414 ("htm", Language::Html),
415 ("xhtml", Language::Html),
416 ("jl", Language::Julia),
417 ("kt", Language::Kotlin),
418 ("kts", Language::Kotlin),
419 ("lua", Language::Lua),
420 ("mk", Language::Makefile),
421 ("nim", Language::Nim),
422 ("nims", Language::Nim),
423 ("m", Language::ObjectiveC),
424 ("mm", Language::ObjectiveC),
425 ("ml", Language::Ocaml),
426 ("mli", Language::Ocaml),
427 ("pl", Language::Perl),
428 ("pm", Language::Perl),
429 ("t", Language::Perl),
430 ("php", Language::Php),
431 ("php3", Language::Php),
432 ("php4", Language::Php),
433 ("php5", Language::Php),
434 ("php7", Language::Php),
435 ("phtml", Language::Php),
436 ("r", Language::R),
437 ("rb", Language::Ruby),
438 ("rake", Language::Ruby),
439 ("scala", Language::Scala),
440 ("sc", Language::Scala),
441 ("scss", Language::Scss),
442 ("sass", Language::Scss),
443 ("sql", Language::Sql),
444 ("svelte", Language::Svelte),
445 ("swift", Language::Swift),
446 ("vue", Language::Vue),
447 ("xml", Language::Xml),
448 ("xsd", Language::Xml),
449 ("xsl", Language::Xml),
450 ("xslt", Language::Xml),
451 ("svg", Language::Xml),
452 ("zig", Language::Zig),
453 ];
454 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
455}
456
457fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
459 if filename == "Dockerfile"
461 || filename.starts_with("Dockerfile.")
462 || filename_lower == "dockerfile"
463 {
464 return Some(Language::Dockerfile);
465 }
466 if matches!(
468 filename,
469 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
470 ) {
471 return Some(Language::Makefile);
472 }
473 if matches!(
475 filename,
476 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
477 ) {
478 return Some(Language::Ruby);
479 }
480 None
481}
482
483#[must_use]
484#[allow(clippy::too_many_lines)]
485pub fn detect_language(
486 path: &Path,
487 first_line: Option<&str>,
488 extension_overrides: &BTreeMap<String, String>,
489 shebang_detection: bool,
490) -> Option<Language> {
491 let extension = path
492 .extension()
493 .and_then(|ext| ext.to_str())
494 .map(str::to_ascii_lowercase);
495
496 if let Some(ext) = extension.as_ref() {
498 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
499 if let Some(lang) = Language::from_name(override_name) {
500 return Some(lang);
501 }
502 }
503 }
504
505 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
507 let filename_lower = filename.to_ascii_lowercase();
508
509 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
510 return Some(lang);
511 }
512
513 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
515 return Some(lang);
516 }
517
518 if shebang_detection {
520 if let Some(line) = first_line {
521 if let Some(lang) = detect_by_shebang(line) {
522 return Some(lang);
523 }
524 }
525 }
526
527 None
528}
529
530#[must_use]
531pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
532 #[cfg(feature = "tree-sitter")]
534 {
535 match language {
536 Language::C | Language::Cpp => {
537 if let Some(result) = ts::analyze_c(text) {
538 return result;
539 }
540 }
541 Language::Python => {
542 if let Some(result) = ts::analyze_python(text) {
543 return result;
544 }
545 }
546 _ => {}
547 }
548 }
549
550 let (mut config, has_preprocessor) = language_scan_config(language);
551
552 if language == Language::Python {
554 config.skip_lines = detect_python_docstring_lines(text);
555 }
556
557 let flags = IeeeFlags {
560 has_preprocessor_directives: has_preprocessor,
561 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
562 collapse_continuation_lines: options.collapse_continuation_lines,
563 };
564 analyze_generic(text, config, flags)
565}
566
567fn language_scan_config(language: Language) -> (ScanConfig, bool) {
575 let cfg = LANG_SCAN_TABLE
576 .iter()
577 .find_map(|&(l, c)| (l == language).then_some(c))
578 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
579 (
580 ScanConfig {
581 line_comments: cfg.line_comments,
582 block_comment: cfg.block_comment,
583 allow_single_quote_strings: cfg.allow_single_quote_strings,
584 allow_double_quote_strings: cfg.allow_double_quote_strings,
585 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
586 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
587 skip_lines: HashSet::new(),
588 symbol_patterns: cfg.symbol_patterns,
589 },
590 cfg.has_preprocessor,
591 )
592}
593
594#[derive(Debug, Clone, Copy)]
598struct SymbolPatterns {
599 functions: &'static [&'static str],
600 classes: &'static [&'static str],
601 variables: &'static [&'static str],
602 imports: &'static [&'static str],
603 tests: &'static [&'static str],
606 assertions: &'static [&'static str],
609 test_suites: &'static [&'static str],
612}
613
614impl SymbolPatterns {
615 const fn none() -> Self {
616 Self {
617 functions: &[],
618 classes: &[],
619 variables: &[],
620 imports: &[],
621 tests: &[],
622 assertions: &[],
623 test_suites: &[],
624 }
625 }
626}
627
628const SP_NONE: SymbolPatterns = SymbolPatterns::none();
629
630const SP_RUST: SymbolPatterns = SymbolPatterns {
631 functions: &[
632 "fn ",
633 "pub fn ",
634 "pub(crate) fn ",
635 "pub(super) fn ",
636 "async fn ",
637 "pub async fn ",
638 "pub(crate) async fn ",
639 "unsafe fn ",
640 "pub unsafe fn ",
641 "pub(crate) unsafe fn ",
642 "const fn ",
643 "pub const fn ",
644 "pub(crate) const fn ",
645 "extern fn ",
646 "pub extern fn ",
647 ],
648 classes: &[
649 "struct ",
650 "pub struct ",
651 "pub(crate) struct ",
652 "enum ",
653 "pub enum ",
654 "pub(crate) enum ",
655 "trait ",
656 "pub trait ",
657 "pub(crate) trait ",
658 "impl ",
659 "impl<",
660 "type ",
661 "pub type ",
662 "pub(crate) type ",
663 ],
664 variables: &["let ", "let mut "],
665 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
666 tests: &[
668 "#[test]",
669 "#[tokio::test]",
670 "#[actix_web::test]",
671 "#[rstest]",
672 "#[test_case",
673 ],
674 assertions: &[
675 "assert_eq!(",
676 "assert_ne!(",
677 "assert!(",
678 "assert_matches!(",
679 "assert_err!(",
680 "assert_ok!(",
681 ],
682 test_suites: &[],
683};
684
685const SP_PYTHON: SymbolPatterns = SymbolPatterns {
686 functions: &["def ", "async def "],
687 classes: &["class "],
688 variables: &[],
689 imports: &["import ", "from "],
690 tests: &["def test_", "async def test_", "class Test"],
692 assertions: &[
693 "self.assertEqual(",
694 "self.assertNotEqual(",
695 "self.assertTrue(",
696 "self.assertFalse(",
697 "self.assertIsNone(",
698 "self.assertIsNotNone(",
699 "self.assertIn(",
700 "self.assertNotIn(",
701 "self.assertRaises(",
702 "self.assertAlmostEqual(",
703 ],
704 test_suites: &[],
705};
706
707const SP_JS: SymbolPatterns = SymbolPatterns {
708 functions: &[
709 "function ",
710 "async function ",
711 "export function ",
712 "export async function ",
713 "export default function ",
714 ],
715 classes: &["class ", "export class ", "export default class "],
716 variables: &[
717 "var ",
718 "let ",
719 "const ",
720 "export var ",
721 "export let ",
722 "export const ",
723 ],
724 imports: &["import "],
725 tests: &[
727 "describe(",
728 "it(",
729 "test(",
730 "it.each(",
731 "test.each(",
732 "describe.each(",
733 ],
734 assertions: &["expect("],
735 test_suites: &[],
736};
737
738const SP_TS: SymbolPatterns = SymbolPatterns {
739 functions: &[
740 "function ",
741 "async function ",
742 "export function ",
743 "export async function ",
744 "export default function ",
745 ],
746 classes: &[
747 "class ",
748 "export class ",
749 "export default class ",
750 "abstract class ",
751 "export abstract class ",
752 "interface ",
753 "export interface ",
754 "declare class ",
755 "declare interface ",
756 ],
757 variables: &[
758 "var ",
759 "let ",
760 "const ",
761 "export var ",
762 "export let ",
763 "export const ",
764 ],
765 imports: &["import "],
766 tests: &[
768 "describe(",
769 "it(",
770 "test(",
771 "it.each(",
772 "test.each(",
773 "describe.each(",
774 ],
775 assertions: &["expect("],
776 test_suites: &[],
777};
778
779const SP_GO: SymbolPatterns = SymbolPatterns {
780 functions: &["func "],
781 classes: &["type "],
782 variables: &["var "],
783 imports: &["import "],
784 tests: &["func Test", "func Benchmark", "func Fuzz"],
786 assertions: &[],
787 test_suites: &[],
788};
789
790const SP_JAVA: SymbolPatterns = SymbolPatterns {
791 functions: &[],
792 classes: &[
793 "class ",
794 "public class ",
795 "private class ",
796 "protected class ",
797 "abstract class ",
798 "final class ",
799 "public abstract class ",
800 "public final class ",
801 "interface ",
802 "public interface ",
803 "enum ",
804 "public enum ",
805 "record ",
806 "public record ",
807 "@interface ",
808 ],
809 variables: &[],
810 imports: &["import "],
811 tests: &[
813 "@Test",
814 "@ParameterizedTest",
815 "@RepeatedTest",
816 "@TestFactory",
817 "@TestTemplate",
818 ],
819 assertions: &[
820 "assertEquals(",
821 "assertNotEquals(",
822 "assertTrue(",
823 "assertFalse(",
824 "assertNull(",
825 "assertNotNull(",
826 "assertThat(",
827 "assertThrows(",
828 "assertAll(",
829 "assertArrayEquals(",
830 "assertIterableEquals(",
831 "assertLinesMatch(",
832 ],
833 test_suites: &[],
834};
835
836const SP_CSHARP: SymbolPatterns = SymbolPatterns {
837 functions: &[],
838 classes: &[
839 "class ",
840 "public class ",
841 "private class ",
842 "protected class ",
843 "internal class ",
844 "abstract class ",
845 "sealed class ",
846 "static class ",
847 "partial class ",
848 "public abstract class ",
849 "public sealed class ",
850 "public static class ",
851 "interface ",
852 "public interface ",
853 "internal interface ",
854 "enum ",
855 "public enum ",
856 "struct ",
857 "public struct ",
858 "record ",
859 "public record ",
860 ],
861 variables: &["var "],
862 imports: &["using "],
863 tests: &[
865 "[TestMethod]",
866 "[Test]",
867 "[Fact]",
868 "[Theory]",
869 "[TestCase(",
870 "[DataRow(",
871 "[InlineData(",
872 "[MemberData(",
873 ],
874 assertions: &[
875 "Assert.AreEqual(",
876 "Assert.AreNotEqual(",
877 "Assert.IsTrue(",
878 "Assert.IsFalse(",
879 "Assert.IsNull(",
880 "Assert.IsNotNull(",
881 "Assert.Equal(",
882 "Assert.NotEqual(",
883 "Assert.True(",
884 "Assert.False(",
885 "Assert.That(",
886 "Assert.Contains(",
887 "Assert.Throws(",
888 "Assert.ThrowsAsync(",
889 "Assert.IsInstanceOfType(",
890 ],
891 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
892};
893
894const TEST_PATTERNS_C_CPP: &[&str] = &[
896 "TEST(",
898 "TEST_F(",
899 "TEST_P(",
900 "TYPED_TEST(",
901 "TYPED_TEST_P(",
902 "INSTANTIATE_TEST_SUITE_P(",
903 "INSTANTIATE_TYPED_TEST_SUITE_P(",
904 "TEST_CASE(",
906 "SECTION(",
907 "SCENARIO(",
908 "SCENARIO_METHOD(",
909 "TEST_CASE_METHOD(",
910 "BOOST_AUTO_TEST_CASE(",
912 "BOOST_FIXTURE_TEST_CASE(",
913 "BOOST_AUTO_TEST_SUITE(",
914 "BOOST_PARAM_TEST_CASE(",
915 "CPPUNIT_TEST(",
917 "CPPUNIT_TEST_SUITE(",
918 "RUN_TEST(",
920 "TEST_IGNORE(",
921 "TEST_FAIL(",
922 "START_TEST(",
924 "tcase_add_test(",
925 "suite_create(",
926 "cmocka_unit_test(",
928 "cmocka_run_group_tests(",
929 "IGNORE_TEST(",
931 "TEST_GROUP(",
932 "TEST_GROUP_BASE(",
933];
934
935const ASSERT_PATTERNS_C_CPP: &[&str] = &[
937 "ASSERT_EQ(",
939 "ASSERT_NE(",
940 "ASSERT_LT(",
941 "ASSERT_LE(",
942 "ASSERT_GT(",
943 "ASSERT_GE(",
944 "ASSERT_TRUE(",
945 "ASSERT_FALSE(",
946 "ASSERT_STREQ(",
947 "ASSERT_STRNE(",
948 "ASSERT_FLOAT_EQ(",
949 "ASSERT_DOUBLE_EQ(",
950 "ASSERT_NEAR(",
951 "ASSERT_THROW(",
952 "ASSERT_NO_THROW(",
953 "ASSERT_ANY_THROW(",
954 "EXPECT_EQ(",
956 "EXPECT_NE(",
957 "EXPECT_LT(",
958 "EXPECT_LE(",
959 "EXPECT_GT(",
960 "EXPECT_GE(",
961 "EXPECT_TRUE(",
962 "EXPECT_FALSE(",
963 "EXPECT_STREQ(",
964 "EXPECT_STRNE(",
965 "EXPECT_FLOAT_EQ(",
966 "EXPECT_DOUBLE_EQ(",
967 "EXPECT_NEAR(",
968 "EXPECT_THROW(",
969 "EXPECT_NO_THROW(",
970 "EXPECT_ANY_THROW(",
971 "REQUIRE(",
973 "CHECK(",
974 "REQUIRE_FALSE(",
975 "CHECK_FALSE(",
976 "REQUIRE_NOTHROW(",
977 "CHECK_NOTHROW(",
978 "REQUIRE_THROWS(",
979 "CHECK_THROWS(",
980 "REQUIRE_THAT(",
981 "CHECK_THAT(",
982 "TEST_ASSERT_EQUAL(",
984 "TEST_ASSERT_EQUAL_INT(",
985 "TEST_ASSERT_EQUAL_STRING(",
986 "TEST_ASSERT_EQUAL_FLOAT(",
987 "TEST_ASSERT_EQUAL_DOUBLE(",
988 "TEST_ASSERT_EQUAL_PTR(",
989 "TEST_ASSERT_TRUE(",
990 "TEST_ASSERT_FALSE(",
991 "TEST_ASSERT_NULL(",
992 "TEST_ASSERT_NOT_NULL(",
993 "TEST_ASSERT_BITS_HIGH(",
994 "TEST_ASSERT_BITS_LOW(",
995 "assert_int_equal(",
997 "assert_int_not_equal(",
998 "assert_string_equal(",
999 "assert_string_not_equal(",
1000 "assert_true(",
1001 "assert_false(",
1002 "assert_null(",
1003 "assert_non_null(",
1004 "assert_ptr_equal(",
1005 "assert_memory_equal(",
1006 "assert_return_code(",
1007];
1008
1009const SUITE_PATTERNS_C_CPP: &[&str] = &[
1011 "TEST_GROUP(",
1012 "TEST_GROUP_BASE(",
1013 "BOOST_AUTO_TEST_SUITE(",
1014 "CPPUNIT_TEST_SUITE(",
1015 "CPPUNIT_TEST_SUITE_END(",
1016];
1017
1018const SP_C: SymbolPatterns = SymbolPatterns {
1019 functions: &[],
1020 classes: &[
1021 "struct ",
1022 "typedef struct ",
1023 "union ",
1024 "typedef union ",
1025 "typedef enum ",
1026 ],
1027 variables: &[],
1028 imports: &["#include "],
1029 tests: TEST_PATTERNS_C_CPP,
1030 assertions: ASSERT_PATTERNS_C_CPP,
1031 test_suites: SUITE_PATTERNS_C_CPP,
1032};
1033
1034const SP_CPP: SymbolPatterns = SymbolPatterns {
1035 functions: &[],
1036 classes: &["class ", "struct ", "namespace ", "template "],
1037 variables: &[],
1038 imports: &["#include "],
1039 tests: TEST_PATTERNS_C_CPP,
1040 assertions: ASSERT_PATTERNS_C_CPP,
1041 test_suites: SUITE_PATTERNS_C_CPP,
1042};
1043
1044const SP_SHELL: SymbolPatterns = SymbolPatterns {
1045 functions: &["function "],
1046 classes: &[],
1047 variables: &["declare ", "local ", "export "],
1048 imports: &["source ", ". "],
1049 tests: &[],
1050 assertions: &[],
1051 test_suites: &[],
1052};
1053
1054const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1055 functions: &["function ", "Function "],
1056 classes: &["class "],
1057 variables: &[],
1058 imports: &["Import-Module ", "using "],
1059 tests: &["Describe ", "It ", "Context "],
1061 assertions: &[],
1062 test_suites: &[],
1063};
1064
1065const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1066 functions: &[
1067 "fun ",
1068 "private fun ",
1069 "public fun ",
1070 "protected fun ",
1071 "internal fun ",
1072 "override fun ",
1073 "suspend fun ",
1074 "abstract fun ",
1075 "open fun ",
1076 "private suspend fun ",
1077 "public suspend fun ",
1078 ],
1079 classes: &[
1080 "class ",
1081 "data class ",
1082 "sealed class ",
1083 "abstract class ",
1084 "open class ",
1085 "object ",
1086 "companion object",
1087 "interface ",
1088 "enum class ",
1089 "annotation class ",
1090 ],
1091 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1092 imports: &["import "],
1093 tests: &[
1095 "@Test",
1096 "@ParameterizedTest",
1097 "@RepeatedTest",
1098 "\"should ",
1099 "\"it ",
1100 ],
1101 assertions: &[
1102 "assertEquals(",
1103 "assertNotEquals(",
1104 "assertTrue(",
1105 "assertFalse(",
1106 "assertNull(",
1107 "assertNotNull(",
1108 "assertThat(",
1109 "assertThrows(",
1110 "shouldBe(",
1111 "shouldNotBe(",
1112 "shouldThrow(",
1113 ],
1114 test_suites: &[],
1115};
1116
1117const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1118 functions: &[
1119 "func ",
1120 "private func ",
1121 "public func ",
1122 "internal func ",
1123 "override func ",
1124 "open func ",
1125 "static func ",
1126 "class func ",
1127 "mutating func ",
1128 "private static func ",
1129 "public static func ",
1130 ],
1131 classes: &[
1132 "class ",
1133 "struct ",
1134 "protocol ",
1135 "enum ",
1136 "extension ",
1137 "actor ",
1138 "public class ",
1139 "private class ",
1140 "open class ",
1141 "final class ",
1142 "public struct ",
1143 "private struct ",
1144 "public protocol ",
1145 ],
1146 variables: &[
1147 "var ",
1148 "let ",
1149 "private var ",
1150 "private let ",
1151 "static var ",
1152 "static let ",
1153 ],
1154 imports: &["import "],
1155 tests: &["func test", "func Test", "@Test"],
1157 assertions: &[
1158 "XCTAssertEqual(",
1159 "XCTAssertNotEqual(",
1160 "XCTAssertTrue(",
1161 "XCTAssertFalse(",
1162 "XCTAssertNil(",
1163 "XCTAssertNotNil(",
1164 "XCTAssertGreaterThan(",
1165 "XCTAssertLessThan(",
1166 "XCTAssertThrowsError(",
1167 "XCTAssertNoThrow(",
1168 "#expect(",
1169 ],
1170 test_suites: &[],
1171};
1172
1173const SP_RUBY: SymbolPatterns = SymbolPatterns {
1174 functions: &["def ", "private def ", "protected def "],
1175 classes: &["class ", "module "],
1176 variables: &[],
1177 imports: &["require ", "require_relative "],
1178 tests: &["it ", "it(", "describe ", "context ", "test "],
1180 assertions: &[],
1181 test_suites: &[],
1182};
1183
1184const SP_SCALA: SymbolPatterns = SymbolPatterns {
1185 functions: &["def ", "private def ", "protected def ", "override def "],
1186 classes: &[
1187 "class ",
1188 "case class ",
1189 "abstract class ",
1190 "sealed class ",
1191 "object ",
1192 "trait ",
1193 ],
1194 variables: &["val ", "var ", "lazy val "],
1195 imports: &["import "],
1196 tests: &["test(", "it(", "describe("],
1198 assertions: &[],
1199 test_suites: &[],
1200};
1201
1202const SP_PHP: SymbolPatterns = SymbolPatterns {
1203 functions: &[
1204 "function ",
1205 "public function ",
1206 "private function ",
1207 "protected function ",
1208 "static function ",
1209 "abstract function ",
1210 "final function ",
1211 "public static function ",
1212 "private static function ",
1213 "protected static function ",
1214 ],
1215 classes: &[
1216 "class ",
1217 "abstract class ",
1218 "final class ",
1219 "interface ",
1220 "trait ",
1221 "enum ",
1222 ],
1223 variables: &[],
1224 imports: &[
1225 "use ",
1226 "require ",
1227 "require_once ",
1228 "include ",
1229 "include_once ",
1230 ],
1231 tests: &[
1233 "public function test",
1234 "function test",
1235 "#[Test]",
1236 "#[DataProvider(",
1237 ],
1238 assertions: &[],
1239 test_suites: &[],
1240};
1241
1242const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1243 functions: &[
1244 "def ",
1245 "defp ",
1246 "defmacro ",
1247 "defmacrop ",
1248 "defguard ",
1249 "defguardp ",
1250 ],
1251 classes: &["defmodule ", "defprotocol ", "defimpl "],
1252 variables: &[],
1253 imports: &["import ", "alias ", "use ", "require "],
1254 tests: &["test ", "describe "],
1256 assertions: &[],
1257 test_suites: &[],
1258};
1259
1260const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1261 functions: &[],
1262 classes: &["-module("],
1263 variables: &[],
1264 imports: &["-import(", "-include(", "-include_lib("],
1265 tests: &[],
1266 assertions: &[],
1267 test_suites: &[],
1268};
1269
1270const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1271 functions: &[
1272 "let ",
1273 "let rec ",
1274 "member ",
1275 "override ",
1276 "abstract member ",
1277 ],
1278 classes: &["type "],
1279 variables: &["let mutable "],
1280 imports: &["open "],
1281 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1283 assertions: &[],
1284 test_suites: &[],
1285};
1286
1287const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1288 functions: &["def ", "private def ", "public def ", "protected def "],
1289 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1290 variables: &[],
1291 imports: &["import "],
1292 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1294 assertions: &[],
1295 test_suites: &[],
1296};
1297
1298const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1299 functions: &[],
1300 classes: &["class ", "data ", "newtype ", "type "],
1301 variables: &[],
1302 imports: &["import "],
1303 tests: &[],
1304 assertions: &[],
1305 test_suites: &[],
1306};
1307
1308const SP_LUA: SymbolPatterns = SymbolPatterns {
1309 functions: &["function ", "local function "],
1310 classes: &[],
1311 variables: &["local "],
1312 imports: &[],
1313 tests: &["it(", "describe(", "pending("],
1315 assertions: &[],
1316 test_suites: &[],
1317};
1318
1319const SP_NIM: SymbolPatterns = SymbolPatterns {
1320 functions: &[
1321 "proc ",
1322 "func ",
1323 "method ",
1324 "iterator ",
1325 "converter ",
1326 "template ",
1327 "macro ",
1328 ],
1329 classes: &["type "],
1330 variables: &["var ", "let ", "const "],
1331 imports: &["import ", "from "],
1332 tests: &["test "],
1334 assertions: &[],
1335 test_suites: &[],
1336};
1337
1338const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1339 functions: &["- (", "+ ("],
1340 classes: &["@interface ", "@implementation ", "@protocol "],
1341 variables: &[],
1342 imports: &["#import ", "#include "],
1343 tests: &["- (void)test"],
1345 assertions: &[
1346 "XCTAssertEqual(",
1347 "XCTAssertNotEqual(",
1348 "XCTAssertTrue(",
1349 "XCTAssertFalse(",
1350 "XCTAssertNil(",
1351 "XCTAssertNotNil(",
1352 "XCTAssertGreaterThan(",
1353 "XCTAssertLessThan(",
1354 "XCTAssertThrowsError(",
1355 "XCTAssertNoThrow(",
1356 ],
1357 test_suites: &[],
1358};
1359
1360const SP_OCAML: SymbolPatterns = SymbolPatterns {
1361 functions: &["let ", "let rec "],
1362 classes: &["type ", "module ", "class "],
1363 variables: &[],
1364 imports: &["open "],
1365 tests: &[],
1366 assertions: &[],
1367 test_suites: &[],
1368};
1369
1370const SP_PERL: SymbolPatterns = SymbolPatterns {
1371 functions: &["sub "],
1372 classes: &["package "],
1373 variables: &["my ", "our ", "local "],
1374 imports: &["use ", "require "],
1375 tests: &[],
1376 assertions: &[],
1377 test_suites: &[],
1378};
1379
1380const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1381 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1382 classes: &[
1383 "(defrecord ",
1384 "(defprotocol ",
1385 "(deftype ",
1386 "(definterface ",
1387 ],
1388 variables: &["(def ", "(defonce "],
1389 imports: &["(ns ", "(require "],
1390 tests: &["(deftest ", "(testing "],
1392 assertions: &[],
1393 test_suites: &[],
1394};
1395
1396const SP_JULIA: SymbolPatterns = SymbolPatterns {
1397 functions: &["function ", "macro "],
1398 classes: &[
1399 "struct ",
1400 "mutable struct ",
1401 "abstract type ",
1402 "primitive type ",
1403 ],
1404 variables: &["const "],
1405 imports: &["import ", "using "],
1406 tests: &["@test ", "@testset "],
1408 assertions: &[],
1409 test_suites: &[],
1410};
1411
1412const SP_DART: SymbolPatterns = SymbolPatterns {
1413 functions: &[],
1414 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1415 variables: &["var ", "final ", "const ", "late "],
1416 imports: &["import "],
1417 tests: &["test(", "testWidgets(", "group("],
1419 assertions: &[],
1420 test_suites: &[],
1421};
1422
1423const SP_R: SymbolPatterns = SymbolPatterns {
1424 functions: &[],
1425 classes: &[],
1426 variables: &[],
1427 imports: &["library(", "source("],
1428 tests: &["test_that(", "it(", "describe(", "expect_"],
1430 assertions: &[],
1431 test_suites: &[],
1432};
1433
1434const SP_SQL: SymbolPatterns = SymbolPatterns {
1435 functions: &[
1436 "create function ",
1437 "create or replace function ",
1438 "create procedure ",
1439 "create or replace procedure ",
1440 "CREATE FUNCTION ",
1441 "CREATE OR REPLACE FUNCTION ",
1442 "CREATE PROCEDURE ",
1443 "CREATE OR REPLACE PROCEDURE ",
1444 ],
1445 classes: &[
1446 "create table ",
1447 "create view ",
1448 "create schema ",
1449 "CREATE TABLE ",
1450 "CREATE VIEW ",
1451 "CREATE SCHEMA ",
1452 ],
1453 variables: &["declare ", "DECLARE "],
1454 imports: &[],
1455 tests: &[],
1456 assertions: &[],
1457 test_suites: &[],
1458};
1459
1460const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1461 functions: &["proc ", "PROC "],
1462 classes: &[],
1463 variables: &[],
1464 imports: &["include ", "INCLUDE ", "%include "],
1465 tests: &[],
1466 assertions: &[],
1467 test_suites: &[],
1468};
1469
1470const SP_ZIG: SymbolPatterns = SymbolPatterns {
1471 functions: &[
1472 "fn ",
1473 "pub fn ",
1474 "export fn ",
1475 "inline fn ",
1476 "pub inline fn ",
1477 ],
1478 classes: &[],
1479 variables: &["var ", "pub var "],
1480 imports: &[],
1481 tests: &["test \"", "test{"],
1483 assertions: &[],
1484 test_suites: &[],
1485};
1486
1487#[allow(clippy::struct_excessive_bools)]
1491#[derive(Clone, Copy)]
1492struct StaticLangConfig {
1493 line_comments: &'static [&'static str],
1494 block_comment: Option<(&'static str, &'static str)>,
1495 allow_single_quote_strings: bool,
1496 allow_double_quote_strings: bool,
1497 allow_triple_quote_strings: bool,
1498 allow_csharp_verbatim_strings: bool,
1499 symbol_patterns: SymbolPatterns,
1500 has_preprocessor: bool,
1502}
1503
1504#[allow(clippy::struct_excessive_bools)]
1505#[derive(Debug, Clone)]
1506struct ScanConfig {
1507 line_comments: &'static [&'static str],
1508 block_comment: Option<(&'static str, &'static str)>,
1509 allow_single_quote_strings: bool,
1510 allow_double_quote_strings: bool,
1511 allow_triple_quote_strings: bool,
1512 allow_csharp_verbatim_strings: bool,
1513 skip_lines: HashSet<usize>,
1514 symbol_patterns: SymbolPatterns,
1515}
1516
1517static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1521 (
1522 Language::C,
1523 StaticLangConfig {
1524 line_comments: &["//"],
1525 block_comment: Some(("/*", "*/")),
1526 allow_single_quote_strings: true,
1527 allow_double_quote_strings: true,
1528 allow_triple_quote_strings: false,
1529 allow_csharp_verbatim_strings: false,
1530 symbol_patterns: SP_C,
1531 has_preprocessor: true,
1532 },
1533 ),
1534 (
1535 Language::Cpp,
1536 StaticLangConfig {
1537 line_comments: &["//"],
1538 block_comment: Some(("/*", "*/")),
1539 allow_single_quote_strings: true,
1540 allow_double_quote_strings: true,
1541 allow_triple_quote_strings: false,
1542 allow_csharp_verbatim_strings: false,
1543 symbol_patterns: SP_CPP,
1544 has_preprocessor: true,
1545 },
1546 ),
1547 (
1548 Language::ObjectiveC,
1549 StaticLangConfig {
1550 line_comments: &["//"],
1551 block_comment: Some(("/*", "*/")),
1552 allow_single_quote_strings: true,
1553 allow_double_quote_strings: true,
1554 allow_triple_quote_strings: false,
1555 allow_csharp_verbatim_strings: false,
1556 symbol_patterns: SP_OBJECTIVEC,
1557 has_preprocessor: true,
1558 },
1559 ),
1560 (
1561 Language::CSharp,
1562 StaticLangConfig {
1563 line_comments: &["//"],
1564 block_comment: Some(("/*", "*/")),
1565 allow_single_quote_strings: true,
1566 allow_double_quote_strings: true,
1567 allow_triple_quote_strings: false,
1568 allow_csharp_verbatim_strings: true,
1569 symbol_patterns: SP_CSHARP,
1570 has_preprocessor: false,
1571 },
1572 ),
1573 (
1574 Language::Go,
1575 StaticLangConfig {
1576 line_comments: &["//"],
1577 block_comment: Some(("/*", "*/")),
1578 allow_single_quote_strings: true,
1579 allow_double_quote_strings: true,
1580 allow_triple_quote_strings: false,
1581 allow_csharp_verbatim_strings: false,
1582 symbol_patterns: SP_GO,
1583 has_preprocessor: false,
1584 },
1585 ),
1586 (
1587 Language::Java,
1588 StaticLangConfig {
1589 line_comments: &["//"],
1590 block_comment: Some(("/*", "*/")),
1591 allow_single_quote_strings: true,
1592 allow_double_quote_strings: true,
1593 allow_triple_quote_strings: false,
1594 allow_csharp_verbatim_strings: false,
1595 symbol_patterns: SP_JAVA,
1596 has_preprocessor: false,
1597 },
1598 ),
1599 (
1600 Language::JavaScript,
1601 StaticLangConfig {
1602 line_comments: &["//"],
1603 block_comment: Some(("/*", "*/")),
1604 allow_single_quote_strings: true,
1605 allow_double_quote_strings: true,
1606 allow_triple_quote_strings: false,
1607 allow_csharp_verbatim_strings: false,
1608 symbol_patterns: SP_JS,
1609 has_preprocessor: false,
1610 },
1611 ),
1612 (
1613 Language::Svelte,
1614 StaticLangConfig {
1615 line_comments: &["//"],
1616 block_comment: Some(("/*", "*/")),
1617 allow_single_quote_strings: true,
1618 allow_double_quote_strings: true,
1619 allow_triple_quote_strings: false,
1620 allow_csharp_verbatim_strings: false,
1621 symbol_patterns: SP_JS,
1622 has_preprocessor: false,
1623 },
1624 ),
1625 (
1626 Language::Vue,
1627 StaticLangConfig {
1628 line_comments: &["//"],
1629 block_comment: Some(("/*", "*/")),
1630 allow_single_quote_strings: true,
1631 allow_double_quote_strings: true,
1632 allow_triple_quote_strings: false,
1633 allow_csharp_verbatim_strings: false,
1634 symbol_patterns: SP_JS,
1635 has_preprocessor: false,
1636 },
1637 ),
1638 (
1639 Language::Rust,
1640 StaticLangConfig {
1641 line_comments: &["//"],
1642 block_comment: Some(("/*", "*/")),
1643 allow_single_quote_strings: false,
1644 allow_double_quote_strings: true,
1645 allow_triple_quote_strings: false,
1646 allow_csharp_verbatim_strings: false,
1647 symbol_patterns: SP_RUST,
1648 has_preprocessor: false,
1649 },
1650 ),
1651 (
1652 Language::Shell,
1653 StaticLangConfig {
1654 line_comments: &["#"],
1655 block_comment: None,
1656 allow_single_quote_strings: true,
1657 allow_double_quote_strings: true,
1658 allow_triple_quote_strings: false,
1659 allow_csharp_verbatim_strings: false,
1660 symbol_patterns: SP_SHELL,
1661 has_preprocessor: false,
1662 },
1663 ),
1664 (
1665 Language::PowerShell,
1666 StaticLangConfig {
1667 line_comments: &["#"],
1668 block_comment: Some(("<#", "#>")),
1669 allow_single_quote_strings: true,
1670 allow_double_quote_strings: true,
1671 allow_triple_quote_strings: false,
1672 allow_csharp_verbatim_strings: false,
1673 symbol_patterns: SP_POWERSHELL,
1674 has_preprocessor: false,
1675 },
1676 ),
1677 (
1678 Language::TypeScript,
1679 StaticLangConfig {
1680 line_comments: &["//"],
1681 block_comment: Some(("/*", "*/")),
1682 allow_single_quote_strings: true,
1683 allow_double_quote_strings: true,
1684 allow_triple_quote_strings: false,
1685 allow_csharp_verbatim_strings: false,
1686 symbol_patterns: SP_TS,
1687 has_preprocessor: false,
1688 },
1689 ),
1690 (
1691 Language::Python,
1692 StaticLangConfig {
1693 line_comments: &["#"],
1694 block_comment: None,
1695 allow_single_quote_strings: true,
1696 allow_double_quote_strings: true,
1697 allow_triple_quote_strings: true,
1698 allow_csharp_verbatim_strings: false,
1699 symbol_patterns: SP_PYTHON,
1700 has_preprocessor: false,
1701 },
1702 ),
1703 (
1704 Language::Assembly,
1705 StaticLangConfig {
1706 line_comments: &[";"],
1707 block_comment: None,
1708 allow_single_quote_strings: false,
1709 allow_double_quote_strings: false,
1710 allow_triple_quote_strings: false,
1711 allow_csharp_verbatim_strings: false,
1712 symbol_patterns: SP_ASSEMBLY,
1713 has_preprocessor: false,
1714 },
1715 ),
1716 (
1717 Language::Clojure,
1718 StaticLangConfig {
1719 line_comments: &[";"],
1720 block_comment: None,
1721 allow_single_quote_strings: false,
1722 allow_double_quote_strings: true,
1723 allow_triple_quote_strings: false,
1724 allow_csharp_verbatim_strings: false,
1725 symbol_patterns: SP_CLOJURE,
1726 has_preprocessor: false,
1727 },
1728 ),
1729 (
1730 Language::Css,
1731 StaticLangConfig {
1732 line_comments: &[],
1733 block_comment: Some(("/*", "*/")),
1734 allow_single_quote_strings: true,
1735 allow_double_quote_strings: true,
1736 allow_triple_quote_strings: false,
1737 allow_csharp_verbatim_strings: false,
1738 symbol_patterns: SP_NONE,
1739 has_preprocessor: false,
1740 },
1741 ),
1742 (
1743 Language::Dart,
1744 StaticLangConfig {
1745 line_comments: &["//"],
1746 block_comment: Some(("/*", "*/")),
1747 allow_single_quote_strings: true,
1748 allow_double_quote_strings: true,
1749 allow_triple_quote_strings: false,
1750 allow_csharp_verbatim_strings: false,
1751 symbol_patterns: SP_DART,
1752 has_preprocessor: false,
1753 },
1754 ),
1755 (
1756 Language::Dockerfile,
1757 StaticLangConfig {
1758 line_comments: &["#"],
1759 block_comment: None,
1760 allow_single_quote_strings: false,
1761 allow_double_quote_strings: false,
1762 allow_triple_quote_strings: false,
1763 allow_csharp_verbatim_strings: false,
1764 symbol_patterns: SP_NONE,
1765 has_preprocessor: false,
1766 },
1767 ),
1768 (
1769 Language::Elixir,
1770 StaticLangConfig {
1771 line_comments: &["#"],
1772 block_comment: None,
1773 allow_single_quote_strings: true,
1774 allow_double_quote_strings: true,
1775 allow_triple_quote_strings: false,
1776 allow_csharp_verbatim_strings: false,
1777 symbol_patterns: SP_ELIXIR,
1778 has_preprocessor: false,
1779 },
1780 ),
1781 (
1782 Language::Erlang,
1783 StaticLangConfig {
1784 line_comments: &["%"],
1785 block_comment: None,
1786 allow_single_quote_strings: false,
1787 allow_double_quote_strings: true,
1788 allow_triple_quote_strings: false,
1789 allow_csharp_verbatim_strings: false,
1790 symbol_patterns: SP_ERLANG,
1791 has_preprocessor: false,
1792 },
1793 ),
1794 (
1795 Language::FSharp,
1796 StaticLangConfig {
1797 line_comments: &["//"],
1798 block_comment: Some(("(*", "*)")),
1799 allow_single_quote_strings: false,
1800 allow_double_quote_strings: true,
1801 allow_triple_quote_strings: false,
1802 allow_csharp_verbatim_strings: false,
1803 symbol_patterns: SP_FSHARP,
1804 has_preprocessor: false,
1805 },
1806 ),
1807 (
1808 Language::Groovy,
1809 StaticLangConfig {
1810 line_comments: &["//"],
1811 block_comment: Some(("/*", "*/")),
1812 allow_single_quote_strings: true,
1813 allow_double_quote_strings: true,
1814 allow_triple_quote_strings: false,
1815 allow_csharp_verbatim_strings: false,
1816 symbol_patterns: SP_GROOVY,
1817 has_preprocessor: false,
1818 },
1819 ),
1820 (
1821 Language::Haskell,
1822 StaticLangConfig {
1823 line_comments: &["--"],
1824 block_comment: Some(("{-", "-}")),
1825 allow_single_quote_strings: true,
1826 allow_double_quote_strings: true,
1827 allow_triple_quote_strings: false,
1828 allow_csharp_verbatim_strings: false,
1829 symbol_patterns: SP_HASKELL,
1830 has_preprocessor: false,
1831 },
1832 ),
1833 (
1834 Language::Html,
1835 StaticLangConfig {
1836 line_comments: &[],
1837 block_comment: Some(("<!--", "-->")),
1838 allow_single_quote_strings: false,
1839 allow_double_quote_strings: false,
1840 allow_triple_quote_strings: false,
1841 allow_csharp_verbatim_strings: false,
1842 symbol_patterns: SP_NONE,
1843 has_preprocessor: false,
1844 },
1845 ),
1846 (
1847 Language::Julia,
1848 StaticLangConfig {
1849 line_comments: &["#"],
1850 block_comment: Some(("#=", "=#")),
1851 allow_single_quote_strings: false,
1852 allow_double_quote_strings: true,
1853 allow_triple_quote_strings: true,
1854 allow_csharp_verbatim_strings: false,
1855 symbol_patterns: SP_JULIA,
1856 has_preprocessor: false,
1857 },
1858 ),
1859 (
1860 Language::Kotlin,
1861 StaticLangConfig {
1862 line_comments: &["//"],
1863 block_comment: Some(("/*", "*/")),
1864 allow_single_quote_strings: true,
1865 allow_double_quote_strings: true,
1866 allow_triple_quote_strings: false,
1867 allow_csharp_verbatim_strings: false,
1868 symbol_patterns: SP_KOTLIN,
1869 has_preprocessor: false,
1870 },
1871 ),
1872 (
1873 Language::Lua,
1874 StaticLangConfig {
1875 line_comments: &["--"],
1876 block_comment: Some(("--[[", "]]")),
1877 allow_single_quote_strings: true,
1878 allow_double_quote_strings: true,
1879 allow_triple_quote_strings: false,
1880 allow_csharp_verbatim_strings: false,
1881 symbol_patterns: SP_LUA,
1882 has_preprocessor: false,
1883 },
1884 ),
1885 (
1886 Language::Makefile,
1887 StaticLangConfig {
1888 line_comments: &["#"],
1889 block_comment: None,
1890 allow_single_quote_strings: false,
1891 allow_double_quote_strings: false,
1892 allow_triple_quote_strings: false,
1893 allow_csharp_verbatim_strings: false,
1894 symbol_patterns: SP_NONE,
1895 has_preprocessor: false,
1896 },
1897 ),
1898 (
1899 Language::Nim,
1900 StaticLangConfig {
1901 line_comments: &["#"],
1902 block_comment: Some(("#[", "]#")),
1903 allow_single_quote_strings: true,
1904 allow_double_quote_strings: true,
1905 allow_triple_quote_strings: false,
1906 allow_csharp_verbatim_strings: false,
1907 symbol_patterns: SP_NIM,
1908 has_preprocessor: false,
1909 },
1910 ),
1911 (
1912 Language::Ocaml,
1913 StaticLangConfig {
1914 line_comments: &[],
1915 block_comment: Some(("(*", "*)")),
1916 allow_single_quote_strings: false,
1917 allow_double_quote_strings: true,
1918 allow_triple_quote_strings: false,
1919 allow_csharp_verbatim_strings: false,
1920 symbol_patterns: SP_OCAML,
1921 has_preprocessor: false,
1922 },
1923 ),
1924 (
1925 Language::Perl,
1926 StaticLangConfig {
1927 line_comments: &["#"],
1928 block_comment: None,
1929 allow_single_quote_strings: true,
1930 allow_double_quote_strings: true,
1931 allow_triple_quote_strings: false,
1932 allow_csharp_verbatim_strings: false,
1933 symbol_patterns: SP_PERL,
1934 has_preprocessor: false,
1935 },
1936 ),
1937 (
1938 Language::Php,
1939 StaticLangConfig {
1940 line_comments: &["//", "#"],
1941 block_comment: Some(("/*", "*/")),
1942 allow_single_quote_strings: true,
1943 allow_double_quote_strings: true,
1944 allow_triple_quote_strings: false,
1945 allow_csharp_verbatim_strings: false,
1946 symbol_patterns: SP_PHP,
1947 has_preprocessor: false,
1948 },
1949 ),
1950 (
1951 Language::R,
1952 StaticLangConfig {
1953 line_comments: &["#"],
1954 block_comment: None,
1955 allow_single_quote_strings: true,
1956 allow_double_quote_strings: true,
1957 allow_triple_quote_strings: false,
1958 allow_csharp_verbatim_strings: false,
1959 symbol_patterns: SP_R,
1960 has_preprocessor: false,
1961 },
1962 ),
1963 (
1964 Language::Ruby,
1965 StaticLangConfig {
1966 line_comments: &["#"],
1967 block_comment: None,
1968 allow_single_quote_strings: true,
1969 allow_double_quote_strings: true,
1970 allow_triple_quote_strings: false,
1971 allow_csharp_verbatim_strings: false,
1972 symbol_patterns: SP_RUBY,
1973 has_preprocessor: false,
1974 },
1975 ),
1976 (
1977 Language::Scala,
1978 StaticLangConfig {
1979 line_comments: &["//"],
1980 block_comment: Some(("/*", "*/")),
1981 allow_single_quote_strings: true,
1982 allow_double_quote_strings: true,
1983 allow_triple_quote_strings: false,
1984 allow_csharp_verbatim_strings: false,
1985 symbol_patterns: SP_SCALA,
1986 has_preprocessor: false,
1987 },
1988 ),
1989 (
1990 Language::Scss,
1991 StaticLangConfig {
1992 line_comments: &["//"],
1993 block_comment: Some(("/*", "*/")),
1994 allow_single_quote_strings: true,
1995 allow_double_quote_strings: true,
1996 allow_triple_quote_strings: false,
1997 allow_csharp_verbatim_strings: false,
1998 symbol_patterns: SP_NONE,
1999 has_preprocessor: false,
2000 },
2001 ),
2002 (
2003 Language::Sql,
2004 StaticLangConfig {
2005 line_comments: &["--"],
2006 block_comment: Some(("/*", "*/")),
2007 allow_single_quote_strings: true,
2008 allow_double_quote_strings: false,
2009 allow_triple_quote_strings: false,
2010 allow_csharp_verbatim_strings: false,
2011 symbol_patterns: SP_SQL,
2012 has_preprocessor: false,
2013 },
2014 ),
2015 (
2016 Language::Swift,
2017 StaticLangConfig {
2018 line_comments: &["//"],
2019 block_comment: Some(("/*", "*/")),
2020 allow_single_quote_strings: false,
2021 allow_double_quote_strings: true,
2022 allow_triple_quote_strings: false,
2023 allow_csharp_verbatim_strings: false,
2024 symbol_patterns: SP_SWIFT,
2025 has_preprocessor: false,
2026 },
2027 ),
2028 (
2029 Language::Xml,
2030 StaticLangConfig {
2031 line_comments: &[],
2032 block_comment: Some(("<!--", "-->")),
2033 allow_single_quote_strings: false,
2034 allow_double_quote_strings: false,
2035 allow_triple_quote_strings: false,
2036 allow_csharp_verbatim_strings: false,
2037 symbol_patterns: SP_NONE,
2038 has_preprocessor: false,
2039 },
2040 ),
2041 (
2042 Language::Zig,
2043 StaticLangConfig {
2044 line_comments: &["//"],
2045 block_comment: None,
2046 allow_single_quote_strings: true,
2047 allow_double_quote_strings: true,
2048 allow_triple_quote_strings: false,
2049 allow_csharp_verbatim_strings: false,
2050 symbol_patterns: SP_ZIG,
2051 has_preprocessor: false,
2052 },
2053 ),
2054];
2055
2056#[derive(Debug, Clone, Copy)]
2059struct IeeeFlags {
2060 has_preprocessor_directives: bool,
2062 blank_in_block_comment_as_comment: bool,
2064 collapse_continuation_lines: bool,
2066}
2067
2068#[derive(Debug, Clone, Copy)]
2069enum StringState {
2070 Single(char),
2071 Triple(&'static str),
2072 VerbatimDouble,
2073}
2074
2075#[allow(clippy::struct_excessive_bools)]
2076#[derive(Debug, Default)]
2077struct LineFacts {
2078 has_code: bool,
2079 has_single_comment: bool,
2080 has_multi_comment: bool,
2081 has_docstring: bool,
2082}
2083
2084fn process_string_char(
2088 state: StringState,
2089 chars: &[char],
2090 i: usize,
2091) -> (Option<StringState>, usize) {
2092 match state {
2093 StringState::Single(delim) => {
2094 if chars[i] == '\\' {
2095 return (Some(state), 2); }
2097 if chars[i] == delim {
2098 (None, 1)
2099 } else {
2100 (Some(state), 1)
2101 }
2102 }
2103 StringState::Triple(delim) => {
2104 if starts_with(chars, i, delim) {
2105 (None, delim.len())
2106 } else {
2107 (Some(state), 1)
2108 }
2109 }
2110 StringState::VerbatimDouble => {
2111 if starts_with(chars, i, "\"\"") {
2112 return (Some(state), 2); }
2114 if chars[i] == '"' {
2115 (None, 1)
2116 } else {
2117 (Some(state), 1)
2118 }
2119 }
2120 }
2121}
2122
2123fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2127 if starts_with(chars, i, close) {
2128 (false, close.len())
2129 } else {
2130 (true, 1)
2131 }
2132}
2133
2134fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2138 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2139 return Some((StringState::VerbatimDouble, 2));
2140 }
2141 if config.allow_triple_quote_strings {
2142 if starts_with(chars, i, "\"\"\"") {
2143 return Some((StringState::Triple("\"\"\""), 3));
2144 }
2145 if starts_with(chars, i, "'''") {
2146 return Some((StringState::Triple("'''"), 3));
2147 }
2148 }
2149 if config.allow_single_quote_strings && chars[i] == '\'' {
2150 return Some((StringState::Single('\''), 1));
2151 }
2152 if config.allow_double_quote_strings && chars[i] == '"' {
2153 return Some((StringState::Single('"'), 1));
2154 }
2155 None
2156}
2157
2158fn step_through_block_comment(
2164 chars: &[char],
2165 i: usize,
2166 block_comment: Option<(&'static str, &'static str)>,
2167 in_block_comment: &mut bool,
2168) -> usize {
2169 if let Some((_, close)) = block_comment {
2170 let (still_in, advance) = process_block_comment_char(chars, i, close);
2171 *in_block_comment = still_in;
2172 return advance;
2173 }
2174 0
2175}
2176
2177fn try_open_block_comment(
2180 chars: &[char],
2181 i: usize,
2182 block_comment: Option<(&'static str, &'static str)>,
2183) -> Option<usize> {
2184 let (open, _) = block_comment?;
2185 starts_with(chars, i, open).then_some(open.len())
2186}
2187
2188fn scan_line(
2192 chars: &[char],
2193 config: &ScanConfig,
2194 facts: &mut LineFacts,
2195 in_block_comment: &mut bool,
2196 string_state: &mut Option<StringState>,
2197) {
2198 let mut i = 0usize;
2199 while i < chars.len() {
2200 if let Some(state) = *string_state {
2202 facts.has_code = true;
2203 let (new_state, advance) = process_string_char(state, chars, i);
2204 *string_state = new_state;
2205 i += advance;
2206 continue;
2207 }
2208
2209 if *in_block_comment {
2211 facts.has_multi_comment = true;
2212 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2213 continue;
2214 }
2215
2216 if chars[i].is_whitespace() {
2218 i += 1;
2219 continue;
2220 }
2221
2222 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2224 facts.has_code = true;
2225 *string_state = Some(new_state);
2226 i += advance;
2227 continue;
2228 }
2229
2230 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2232 facts.has_multi_comment = true;
2233 *in_block_comment = true;
2234 i += advance;
2235 continue;
2236 }
2237
2238 if config
2240 .line_comments
2241 .iter()
2242 .any(|prefix| starts_with(chars, i, prefix))
2243 {
2244 facts.has_single_comment = true;
2245 break;
2246 }
2247
2248 facts.has_code = true;
2250 i += 1;
2251 }
2252}
2253
2254fn finalize_line_facts(
2259 facts: LineFacts,
2260 trimmed: &str,
2261 raw: &mut RawLineCounts,
2262 ieee: IeeeFlags,
2263 in_block_comment: bool,
2264 string_state: Option<StringState>,
2265 pending_continuation: &mut Option<LineFacts>,
2266) -> Option<LineFacts> {
2267 if ieee.has_preprocessor_directives
2271 && facts.has_code
2272 && !facts.has_single_comment
2273 && !facts.has_multi_comment
2274 && trimmed.starts_with('#')
2275 {
2276 raw.compiler_directive_lines += 1;
2277 }
2278
2279 let is_continuation = ieee.collapse_continuation_lines
2282 && !in_block_comment
2283 && string_state.is_none()
2284 && trimmed.ends_with('\\');
2285
2286 if is_continuation {
2287 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2288 pending.has_code |= facts.has_code;
2289 pending.has_single_comment |= facts.has_single_comment;
2290 pending.has_multi_comment |= facts.has_multi_comment;
2291 pending.has_docstring |= facts.has_docstring;
2292 return None; }
2294
2295 let emit = if let Some(pending) = pending_continuation.take() {
2297 LineFacts {
2298 has_code: pending.has_code | facts.has_code,
2299 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2300 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2301 has_docstring: pending.has_docstring | facts.has_docstring,
2302 }
2303 } else {
2304 facts
2305 };
2306 Some(emit)
2307}
2308
2309#[allow(clippy::needless_pass_by_value)]
2314#[allow(clippy::too_many_arguments)]
2315#[allow(clippy::many_single_char_names)] fn process_physical_line(
2317 line: &str,
2318 line_idx: usize,
2319 config: &ScanConfig,
2320 raw: &mut RawLineCounts,
2321 in_block_comment: &mut bool,
2322 string_state: &mut Option<StringState>,
2323 pending_continuation: &mut Option<LineFacts>,
2324 ieee: IeeeFlags,
2325) {
2326 raw.total_physical_lines += 1;
2327
2328 if config.skip_lines.contains(&line_idx) {
2329 raw.docstring_comment_lines += 1;
2330 return;
2331 }
2332
2333 let trimmed = line.trim();
2334 let mut facts = LineFacts::default();
2335
2336 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2340 facts.has_multi_comment = true;
2341 }
2342
2343 let chars: Vec<char> = line.chars().collect();
2344 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2345
2346 let Some(emit) = finalize_line_facts(
2347 facts,
2348 trimmed,
2349 raw,
2350 ieee,
2351 *in_block_comment,
2352 *string_state,
2353 pending_continuation,
2354 ) else {
2355 return;
2356 };
2357
2358 classify_line(raw, &emit, trimmed);
2359
2360 if emit.has_code {
2361 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2362 raw.functions += f;
2363 raw.classes += c;
2364 raw.variables += v;
2365 raw.imports += i;
2366 raw.test_count += t;
2367 raw.test_assertion_count += a;
2368 raw.test_suite_count += s;
2369 }
2370}
2371
2372#[allow(clippy::needless_pass_by_value)]
2373fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2374 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2375 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2376
2377 let mut raw = RawLineCounts::default();
2378 let mut warnings = Vec::new();
2379
2380 let mut in_block_comment = false;
2381 let mut string_state: Option<StringState> = None;
2382 let mut pending_continuation: Option<LineFacts> = None;
2384
2385 for (line_idx, line) in lines.iter().enumerate() {
2386 process_physical_line(
2387 line,
2388 line_idx,
2389 &config,
2390 &mut raw,
2391 &mut in_block_comment,
2392 &mut string_state,
2393 &mut pending_continuation,
2394 ieee,
2395 );
2396 }
2397
2398 if let Some(pending) = pending_continuation.take() {
2400 classify_line(&mut raw, &pending, "");
2401 }
2402
2403 if in_block_comment {
2404 warnings.push("unclosed block comment detected; result is best effort".into());
2405 }
2406 if string_state.is_some() {
2407 warnings.push("unclosed string literal detected; result is best effort".into());
2408 }
2409
2410 RawFileAnalysis {
2411 raw,
2412 parse_mode: if warnings.is_empty() {
2413 ParseMode::Lexical
2414 } else {
2415 ParseMode::LexicalBestEffort
2416 },
2417 warnings,
2418 }
2419}
2420
2421const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2422 if facts.has_docstring {
2423 raw.docstring_comment_lines += 1;
2424 } else if !facts.has_code
2425 && !facts.has_single_comment
2426 && !facts.has_multi_comment
2427 && trimmed.is_empty()
2428 {
2429 raw.blank_only_lines += 1;
2430 } else if facts.has_code && facts.has_single_comment {
2431 raw.mixed_code_single_comment_lines += 1;
2432 } else if facts.has_code && facts.has_multi_comment {
2433 raw.mixed_code_multi_comment_lines += 1;
2434 } else if facts.has_code {
2435 raw.code_only_lines += 1;
2436 } else if facts.has_single_comment {
2437 raw.single_comment_only_lines += 1;
2438 } else if facts.has_multi_comment {
2439 raw.multi_comment_only_lines += 1;
2440 } else if trimmed.is_empty() {
2441 raw.blank_only_lines += 1;
2442 } else {
2443 raw.skipped_unknown_lines += 1;
2444 }
2445}
2446
2447fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2448 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2449 (
2450 hit(patterns.functions),
2451 hit(patterns.classes),
2452 hit(patterns.variables),
2453 hit(patterns.imports),
2454 hit(patterns.tests),
2455 hit(patterns.assertions),
2456 hit(patterns.test_suites),
2457 )
2458}
2459
2460fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2461 let needle_chars: Vec<char> = needle.chars().collect();
2462 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2463}
2464
2465#[derive(Debug, Clone)]
2466struct PyContext {
2467 indent: usize,
2468 expect_docstring: bool,
2469}
2470
2471fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2473 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2474 contexts.pop();
2475 }
2476}
2477
2478fn py_handle_pending_indent(
2481 pending_block_indent: &mut Option<usize>,
2482 contexts: &mut Vec<PyContext>,
2483 indent: usize,
2484 trimmed: &str,
2485) {
2486 let Some(base_indent) = *pending_block_indent else {
2487 return;
2488 };
2489 if indent > base_indent {
2490 contexts.push(PyContext {
2491 indent,
2492 expect_docstring: true,
2493 });
2494 *pending_block_indent = None;
2495 } else if !trimmed.starts_with('@') {
2496 *pending_block_indent = None;
2497 }
2498}
2499
2500fn py_try_record_docstring(
2506 ctx: &mut PyContext,
2507 trimmed: &str,
2508 idx: usize,
2509 docstring_lines: &mut HashSet<usize>,
2510 active_docstring: &mut Option<(&'static str, usize)>,
2511) -> bool {
2512 if !ctx.expect_docstring {
2513 return false;
2514 }
2515 if let Some(delim) = docstring_delimiter(trimmed) {
2516 docstring_lines.insert(idx);
2517 ctx.expect_docstring = false;
2518 if !closes_triple_docstring(trimmed, delim, true) {
2519 *active_docstring = Some((delim, idx));
2520 }
2521 return true;
2522 }
2523 ctx.expect_docstring = false;
2524 false
2525}
2526
2527fn track_active_docstring(
2531 active_docstring: &mut Option<(&'static str, usize)>,
2532 docstring_lines: &mut HashSet<usize>,
2533 idx: usize,
2534 trimmed: &str,
2535) -> bool {
2536 let Some((delim, start_line)) = *active_docstring else {
2537 return false;
2538 };
2539 docstring_lines.insert(idx);
2540 if closes_triple_docstring(trimmed, delim, idx == start_line) {
2541 *active_docstring = None;
2542 }
2543 true
2544}
2545
2546fn try_record_docstring_if_context(
2549 contexts: &mut [PyContext],
2550 trimmed: &str,
2551 idx: usize,
2552 docstring_lines: &mut HashSet<usize>,
2553 active_docstring: &mut Option<(&'static str, usize)>,
2554) -> bool {
2555 let Some(ctx) = contexts.last_mut() else {
2556 return false;
2557 };
2558 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2559}
2560
2561fn mark_unclosed_docstring_lines(
2563 active_docstring: Option<&(&'static str, usize)>,
2564 docstring_lines: &mut HashSet<usize>,
2565 num_lines: usize,
2566) {
2567 if let Some(&(_, start_line)) = active_docstring {
2568 for idx in start_line..num_lines {
2569 docstring_lines.insert(idx);
2570 }
2571 }
2572}
2573
2574fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2575 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2576 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2577
2578 let mut docstring_lines = HashSet::new();
2579 let mut contexts = vec![PyContext {
2580 indent: 0,
2581 expect_docstring: true,
2582 }];
2583 let mut pending_block_indent: Option<usize> = None;
2584 let mut active_docstring: Option<(&'static str, usize)> = None;
2585
2586 for (idx, line) in lines.iter().enumerate() {
2587 let trimmed = line.trim();
2588 let indent = leading_indent(line);
2589
2590 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2591 continue;
2592 }
2593
2594 if trimmed.is_empty() || trimmed.starts_with('#') {
2596 continue;
2597 }
2598
2599 py_pop_outdented_contexts(&mut contexts, indent);
2600 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2601
2602 if try_record_docstring_if_context(
2603 &mut contexts,
2604 trimmed,
2605 idx,
2606 &mut docstring_lines,
2607 &mut active_docstring,
2608 ) {
2609 continue;
2610 }
2611
2612 if is_python_block_header(trimmed) {
2613 pending_block_indent = Some(indent);
2614 }
2615 }
2616
2617 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2618
2619 docstring_lines
2620}
2621
2622fn leading_indent(line: &str) -> usize {
2623 line.chars().take_while(|c| c.is_whitespace()).count()
2624}
2625
2626fn is_python_block_header(trimmed: &str) -> bool {
2627 (trimmed.starts_with("def ")
2628 || trimmed.starts_with("async def ")
2629 || trimmed.starts_with("class "))
2630 && trimmed.ends_with(':')
2631}
2632
2633fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2634 let mut idx = 0usize;
2635 let bytes = trimmed.as_bytes();
2636 while idx < bytes.len() {
2637 let c = bytes[idx] as char;
2638 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2639 idx += 1;
2640 continue;
2641 }
2642 break;
2643 }
2644
2645 let rest = &trimmed[idx..];
2646 if rest.starts_with("\"\"\"") {
2647 Some("\"\"\"")
2648 } else if rest.starts_with("'''") {
2649 Some("'''")
2650 } else {
2651 None
2652 }
2653}
2654
2655fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2656 let mut occurrences = 0usize;
2657 let mut search = trimmed;
2658 while let Some(index) = search.find(delim) {
2659 occurrences += 1;
2660 search = &search[index + delim.len()..];
2661 }
2662
2663 if same_line_as_start {
2664 occurrences >= 2
2665 } else {
2666 occurrences >= 1
2667 }
2668}
2669
2670#[cfg(feature = "tree-sitter")]
2675pub mod ts {
2676 use tree_sitter::Node;
2677
2678 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2679
2680 fn analyze_lines(
2685 text: &str,
2686 ts_language: &tree_sitter::Language,
2687 comment_node_kinds: &[&str],
2688 docstring_stmt_kind: Option<&str>,
2689 ) -> Option<RawFileAnalysis> {
2690 let mut parser = tree_sitter::Parser::new();
2691 parser.set_language(ts_language).ok()?;
2692 let tree = parser.parse(text, None)?;
2693
2694 let lines: Vec<&str> = text.split_terminator('\n').collect();
2695 let n = lines.len();
2696
2697 let mut has_code = vec![false; n];
2698 let mut has_comment = vec![false; n];
2699 let mut comment_is_block = vec![false; n];
2700 let mut has_docstring = vec![false; n];
2701
2702 let mut ctx = VisitCtx {
2704 source: text.as_bytes(),
2705 comment_kinds: comment_node_kinds,
2706 docstring_stmt_kind,
2707 has_code: &mut has_code,
2708 has_comment: &mut has_comment,
2709 comment_is_block: &mut comment_is_block,
2710 has_docstring: &mut has_docstring,
2711 };
2712 visit(tree.root_node(), &mut ctx);
2713
2714 let mut raw = RawLineCounts::default();
2715 classify_ts_lines(
2716 &lines,
2717 &has_code,
2718 &has_comment,
2719 &comment_is_block,
2720 &has_docstring,
2721 &mut raw,
2722 );
2723
2724 Some(RawFileAnalysis {
2725 raw,
2726 parse_mode: ParseMode::TreeSitter,
2727 warnings: Vec::new(),
2728 })
2729 }
2730
2731 #[allow(clippy::struct_excessive_bools)]
2734 #[derive(Clone, Copy)]
2735 struct TsLineFlags {
2736 has_code: bool,
2737 has_comment: bool,
2738 comment_is_block: bool,
2739 has_docstring: bool,
2740 }
2741
2742 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2744 if trimmed.is_empty() {
2745 raw.blank_only_lines += 1;
2746 } else if flags.has_docstring && !flags.has_code {
2747 raw.docstring_comment_lines += 1;
2748 } else if flags.has_code && flags.has_comment {
2749 if flags.comment_is_block {
2751 raw.mixed_code_multi_comment_lines += 1;
2752 } else {
2753 raw.mixed_code_single_comment_lines += 1;
2754 }
2755 } else if flags.has_comment {
2756 if flags.comment_is_block {
2757 raw.multi_comment_only_lines += 1;
2758 } else {
2759 raw.single_comment_only_lines += 1;
2760 }
2761 } else {
2762 raw.code_only_lines += 1;
2763 }
2764 }
2765
2766 fn classify_ts_lines(
2768 lines: &[&str],
2769 has_code: &[bool],
2770 has_comment: &[bool],
2771 comment_is_block: &[bool],
2772 has_docstring: &[bool],
2773 raw: &mut RawLineCounts,
2774 ) {
2775 for i in 0..lines.len() {
2776 raw.total_physical_lines += 1;
2777 classify_ts_line(
2778 lines[i].trim(),
2779 TsLineFlags {
2780 has_code: has_code[i],
2781 has_comment: has_comment[i],
2782 comment_is_block: comment_is_block[i],
2783 has_docstring: has_docstring[i],
2784 },
2785 raw,
2786 );
2787 }
2788 }
2789
2790 struct VisitCtx<'a> {
2791 source: &'a [u8],
2792 comment_kinds: &'a [&'a str],
2793 docstring_stmt_kind: Option<&'a str>,
2794 has_code: &'a mut Vec<bool>,
2795 has_comment: &'a mut Vec<bool>,
2796 comment_is_block: &'a mut Vec<bool>,
2797 has_docstring: &'a mut Vec<bool>,
2798 }
2799
2800 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2802 let start_row = node.start_position().row;
2803 let end_row = node.end_position().row;
2804 let first_two = node
2805 .utf8_text(ctx.source)
2806 .unwrap_or("")
2807 .get(..2)
2808 .unwrap_or("");
2809 let is_block = first_two == "/*" || first_two == "<#";
2810 for row in start_row..=end_row {
2811 if row < ctx.has_comment.len() {
2812 ctx.has_comment[row] = true;
2813 if is_block {
2814 ctx.comment_is_block[row] = true;
2815 }
2816 }
2817 }
2818 }
2819
2820 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2823 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2824 return false;
2825 };
2826 if kind != stmt_kind || node.named_child_count() != 1 {
2827 return false;
2828 }
2829 let Some(child) = node.named_child(0) else {
2830 return false;
2831 };
2832 if child.kind() != "string" {
2833 return false;
2834 }
2835 let child_start = child.start_position().row;
2836 let child_end = child.end_position().row;
2837 for row in child_start..=child_end {
2838 if row < ctx.has_docstring.len() {
2839 ctx.has_docstring[row] = true;
2840 }
2841 }
2842 true
2843 }
2844
2845 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2847 let start_row = node.start_position().row;
2848 let end_row = node.end_position().row;
2849 for row in start_row..=end_row {
2850 if row < ctx.has_code.len() {
2851 ctx.has_code[row] = true;
2852 }
2853 }
2854 }
2855
2856 #[allow(clippy::too_many_lines)]
2857 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2858 let kind = node.kind();
2860
2861 if ctx.comment_kinds.contains(&kind) {
2863 visit_comment_node(node, ctx);
2864 return;
2865 }
2866
2867 if visit_maybe_docstring(node, kind, ctx) {
2869 return;
2870 }
2871
2872 if node.child_count() == 0 && !node.is_extra() {
2874 visit_leaf_code(node, ctx);
2875 return;
2876 }
2877
2878 for i in 0..node.child_count() {
2879 #[allow(clippy::cast_possible_truncation)]
2880 if let Some(child) = node.child(i as u32) {
2882 visit(child, ctx);
2883 }
2884 }
2885 }
2886
2887 #[must_use]
2889 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
2890 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
2891 analyze_lines(text, &lang, &["comment"], None)
2892 }
2893
2894 #[must_use]
2896 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
2897 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
2898 analyze_lines(text, &lang, &["comment"], Some("expression_statement"))
2899 }
2900}
2901
2902#[cfg(test)]
2903mod tests {
2904 use super::*;
2905
2906 #[test]
2907 fn python_docstrings_are_separated() {
2908 let input = r#""""module docs"""
2909
2910
2911def fn_a():
2912 """function docs"""
2913 value = 1 # trailing comment
2914 return value
2915"#;
2916
2917 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
2918 assert_eq!(result.raw.docstring_comment_lines, 2);
2919 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2920 assert_eq!(result.raw.code_only_lines, 2);
2921 }
2922
2923 #[test]
2924 fn c_style_mixed_lines_are_captured() {
2925 let input = "int x = 1; // note\n/* block */\n";
2926 let result = analyze_text(Language::C, input, AnalysisOptions::default());
2927 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2928 assert_eq!(result.raw.multi_comment_only_lines, 1);
2929 }
2930
2931 #[test]
2932 fn detect_language_by_shebang() {
2933 let language = detect_language(
2934 Path::new("script"),
2935 Some("#!/usr/bin/env bash"),
2936 &BTreeMap::new(),
2937 true,
2938 );
2939 assert_eq!(language, Some(Language::Shell));
2940 }
2941}