1pub mod cpp_style;
5pub mod style;
6pub use cpp_style::{BraceStyle, CppStyleAnalysis, IndentStyle, PointerStyle, StyleGuideScore};
7pub use style::{StyleAnalysis, StyleSignal};
8
9use std::collections::{BTreeMap, BTreeSet, HashSet};
10use std::path::Path;
11
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
15#[serde(rename_all = "snake_case")]
16pub enum Language {
17 C,
18 Cpp,
19 CSharp,
20 Go,
21 Java,
22 JavaScript,
23 Python,
24 Rust,
25 Shell,
26 PowerShell,
27 TypeScript,
28 Assembly,
30 Clojure,
31 Css,
32 Dart,
33 Dockerfile,
34 Elixir,
35 Erlang,
36 FSharp,
37 Groovy,
38 Haskell,
39 Html,
40 Julia,
41 Kotlin,
42 Lua,
43 Makefile,
44 Nim,
45 ObjectiveC,
46 Ocaml,
47 Perl,
48 Php,
49 R,
50 Ruby,
51 Scala,
52 Scss,
53 Sql,
54 Svelte,
55 Swift,
56 Vue,
57 Xml,
58 Zig,
59}
60
61impl Language {
62 #[must_use]
63 pub const fn display_name(&self) -> &'static str {
64 match self {
65 Self::C => "C",
66 Self::Cpp => "C++",
67 Self::CSharp => "C#",
68 Self::Go => "Go",
69 Self::Java => "Java",
70 Self::JavaScript => "JavaScript",
71 Self::Python => "Python",
72 Self::Rust => "Rust",
73 Self::Shell => "Shell",
74 Self::PowerShell => "PowerShell",
75 Self::TypeScript => "TypeScript",
76 Self::Assembly => "Assembly",
77 Self::Clojure => "Clojure",
78 Self::Css => "CSS",
79 Self::Dart => "Dart",
80 Self::Dockerfile => "Dockerfile",
81 Self::Elixir => "Elixir",
82 Self::Erlang => "Erlang",
83 Self::FSharp => "F#",
84 Self::Groovy => "Groovy",
85 Self::Haskell => "Haskell",
86 Self::Html => "HTML",
87 Self::Julia => "Julia",
88 Self::Kotlin => "Kotlin",
89 Self::Lua => "Lua",
90 Self::Makefile => "Makefile",
91 Self::Nim => "Nim",
92 Self::ObjectiveC => "Objective-C",
93 Self::Ocaml => "OCaml",
94 Self::Perl => "Perl",
95 Self::Php => "PHP",
96 Self::R => "R",
97 Self::Ruby => "Ruby",
98 Self::Scala => "Scala",
99 Self::Scss => "SCSS",
100 Self::Sql => "SQL",
101 Self::Svelte => "Svelte",
102 Self::Swift => "Swift",
103 Self::Vue => "Vue",
104 Self::Xml => "XML",
105 Self::Zig => "Zig",
106 }
107 }
108
109 #[must_use]
110 pub const fn as_slug(&self) -> &'static str {
111 match self {
112 Self::C => "c",
113 Self::Cpp => "cpp",
114 Self::CSharp => "csharp",
115 Self::Go => "go",
116 Self::Java => "java",
117 Self::JavaScript => "javascript",
118 Self::Python => "python",
119 Self::Rust => "rust",
120 Self::Shell => "shell",
121 Self::PowerShell => "powershell",
122 Self::TypeScript => "typescript",
123 Self::Assembly => "assembly",
124 Self::Clojure => "clojure",
125 Self::Css => "css",
126 Self::Dart => "dart",
127 Self::Dockerfile => "dockerfile",
128 Self::Elixir => "elixir",
129 Self::Erlang => "erlang",
130 Self::FSharp => "fsharp",
131 Self::Groovy => "groovy",
132 Self::Haskell => "haskell",
133 Self::Html => "html",
134 Self::Julia => "julia",
135 Self::Kotlin => "kotlin",
136 Self::Lua => "lua",
137 Self::Makefile => "makefile",
138 Self::Nim => "nim",
139 Self::ObjectiveC => "objectivec",
140 Self::Ocaml => "ocaml",
141 Self::Perl => "perl",
142 Self::Php => "php",
143 Self::R => "r",
144 Self::Ruby => "ruby",
145 Self::Scala => "scala",
146 Self::Scss => "scss",
147 Self::Sql => "sql",
148 Self::Svelte => "svelte",
149 Self::Swift => "swift",
150 Self::Vue => "vue",
151 Self::Xml => "xml",
152 Self::Zig => "zig",
153 }
154 }
155
156 #[must_use]
157 pub fn from_name(name: &str) -> Option<Self> {
158 match name.trim().to_ascii_lowercase().as_str() {
159 "c" => Some(Self::C),
160 "cpp" | "c++" | "cplusplus" => Some(Self::Cpp),
161 "csharp" | "c#" | "cs" => Some(Self::CSharp),
162 "go" | "golang" => Some(Self::Go),
163 "java" => Some(Self::Java),
164 "javascript" | "js" => Some(Self::JavaScript),
165 "python" | "py" => Some(Self::Python),
166 "rust" | "rs" => Some(Self::Rust),
167 "shell" | "sh" | "bash" => Some(Self::Shell),
168 "powershell" | "pwsh" | "ps" => Some(Self::PowerShell),
169 "typescript" | "ts" => Some(Self::TypeScript),
170 "assembly" | "asm" => Some(Self::Assembly),
171 "clojure" | "clj" => Some(Self::Clojure),
172 "css" => Some(Self::Css),
173 "dart" => Some(Self::Dart),
174 "dockerfile" | "docker" => Some(Self::Dockerfile),
175 "elixir" | "ex" => Some(Self::Elixir),
176 "erlang" | "erl" => Some(Self::Erlang),
177 "fsharp" | "f#" | "fs" => Some(Self::FSharp),
178 "groovy" => Some(Self::Groovy),
179 "haskell" | "hs" => Some(Self::Haskell),
180 "html" | "htm" => Some(Self::Html),
181 "julia" | "jl" => Some(Self::Julia),
182 "kotlin" | "kt" => Some(Self::Kotlin),
183 "lua" => Some(Self::Lua),
184 "makefile" | "make" | "mk" => Some(Self::Makefile),
185 "nim" => Some(Self::Nim),
186 "objectivec" | "objc" | "objective-c" => Some(Self::ObjectiveC),
187 "ocaml" | "ml" => Some(Self::Ocaml),
188 "perl" | "pl" => Some(Self::Perl),
189 "php" => Some(Self::Php),
190 "r" => Some(Self::R),
191 "ruby" | "rb" => Some(Self::Ruby),
192 "scala" => Some(Self::Scala),
193 "scss" | "sass" => Some(Self::Scss),
194 "sql" => Some(Self::Sql),
195 "svelte" => Some(Self::Svelte),
196 "swift" => Some(Self::Swift),
197 "vue" => Some(Self::Vue),
198 "xml" => Some(Self::Xml),
199 "zig" => Some(Self::Zig),
200 _ => None,
201 }
202 }
203}
204
205#[derive(Debug, Clone, Serialize, Deserialize, Default)]
206pub struct RawLineCounts {
207 pub total_physical_lines: u64,
208 pub blank_only_lines: u64,
209 pub code_only_lines: u64,
210 pub single_comment_only_lines: u64,
211 pub multi_comment_only_lines: u64,
212 pub mixed_code_single_comment_lines: u64,
213 pub mixed_code_multi_comment_lines: u64,
214 pub docstring_comment_lines: u64,
215 pub skipped_unknown_lines: u64,
216 #[serde(default)]
218 pub functions: u64,
219 #[serde(default)]
221 pub classes: u64,
222 #[serde(default)]
224 pub variables: u64,
225 #[serde(default)]
227 pub imports: u64,
228 #[serde(default)]
232 pub compiler_directive_lines: u64,
233 #[serde(default)]
236 pub test_count: u64,
237 #[serde(default)]
240 pub test_assertion_count: u64,
241 #[serde(default)]
244 pub test_suite_count: u64,
245}
246
247#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
248#[serde(rename_all = "snake_case")]
249pub enum ParseMode {
250 Lexical,
251 LexicalBestEffort,
252 TreeSitter,
253}
254
255#[derive(Debug, Clone, Serialize, Deserialize)]
256pub struct RawFileAnalysis {
257 pub raw: RawLineCounts,
258 pub parse_mode: ParseMode,
259 pub warnings: Vec<String>,
260 #[serde(default, skip_serializing_if = "Option::is_none")]
262 pub style_analysis: Option<StyleAnalysis>,
263}
264
265#[derive(Debug, Clone, Copy)]
270pub struct AnalysisOptions {
271 pub blank_in_block_comment_as_comment: bool,
274 pub collapse_continuation_lines: bool,
277}
278
279impl Default for AnalysisOptions {
280 fn default() -> Self {
281 Self {
282 blank_in_block_comment_as_comment: true,
283 collapse_continuation_lines: false,
284 }
285 }
286}
287
288#[must_use]
289pub fn supported_languages() -> BTreeSet<Language> {
290 [
291 Language::Assembly,
292 Language::C,
293 Language::Clojure,
294 Language::Cpp,
295 Language::CSharp,
296 Language::Css,
297 Language::Dart,
298 Language::Dockerfile,
299 Language::Elixir,
300 Language::Erlang,
301 Language::FSharp,
302 Language::Go,
303 Language::Groovy,
304 Language::Haskell,
305 Language::Html,
306 Language::Java,
307 Language::JavaScript,
308 Language::Julia,
309 Language::Kotlin,
310 Language::Lua,
311 Language::Makefile,
312 Language::Nim,
313 Language::ObjectiveC,
314 Language::Ocaml,
315 Language::Perl,
316 Language::Php,
317 Language::PowerShell,
318 Language::Python,
319 Language::R,
320 Language::Ruby,
321 Language::Rust,
322 Language::Scala,
323 Language::Scss,
324 Language::Shell,
325 Language::Sql,
326 Language::Svelte,
327 Language::Swift,
328 Language::TypeScript,
329 Language::Vue,
330 Language::Xml,
331 Language::Zig,
332 ]
333 .into_iter()
334 .collect()
335}
336
337fn detect_by_shebang(line: &str) -> Option<Language> {
339 let lower = line.to_ascii_lowercase();
340 if !lower.starts_with("#!") {
341 return None;
342 }
343 if lower.contains("python") {
344 return Some(Language::Python);
345 }
346 if lower.contains("pwsh") || lower.contains("powershell") {
347 return Some(Language::PowerShell);
348 }
349 if lower.contains("bash")
350 || lower.contains("/sh")
351 || lower.contains("zsh")
352 || lower.contains("ksh")
353 {
354 return Some(Language::Shell);
355 }
356 if lower.contains("ruby") {
357 return Some(Language::Ruby);
358 }
359 if lower.contains("perl") {
360 return Some(Language::Perl);
361 }
362 if lower.contains("php") {
363 return Some(Language::Php);
364 }
365 if lower.contains("node") || lower.contains("nodejs") {
366 return Some(Language::JavaScript);
367 }
368 None
369}
370
371fn detect_by_extension(ext: &str) -> Option<Language> {
373 static EXT_MAP: &[(&str, Language)] = &[
375 ("c", Language::C),
376 ("h", Language::C),
377 ("cc", Language::Cpp),
378 ("cp", Language::Cpp),
379 ("cpp", Language::Cpp),
380 ("cxx", Language::Cpp),
381 ("hh", Language::Cpp),
382 ("hpp", Language::Cpp),
383 ("hxx", Language::Cpp),
384 ("cs", Language::CSharp),
385 ("go", Language::Go),
386 ("java", Language::Java),
387 ("js", Language::JavaScript),
388 ("mjs", Language::JavaScript),
389 ("cjs", Language::JavaScript),
390 ("py", Language::Python),
391 ("rs", Language::Rust),
392 ("sh", Language::Shell),
393 ("bash", Language::Shell),
394 ("zsh", Language::Shell),
395 ("ksh", Language::Shell),
396 ("ps1", Language::PowerShell),
397 ("psm1", Language::PowerShell),
398 ("psd1", Language::PowerShell),
399 ("ts", Language::TypeScript),
400 ("mts", Language::TypeScript),
401 ("cts", Language::TypeScript),
402 ("asm", Language::Assembly),
403 ("s", Language::Assembly),
404 ("clj", Language::Clojure),
405 ("cljs", Language::Clojure),
406 ("cljc", Language::Clojure),
407 ("edn", Language::Clojure),
408 ("css", Language::Css),
409 ("dart", Language::Dart),
410 ("ex", Language::Elixir),
411 ("exs", Language::Elixir),
412 ("erl", Language::Erlang),
413 ("hrl", Language::Erlang),
414 ("fs", Language::FSharp),
415 ("fsi", Language::FSharp),
416 ("fsx", Language::FSharp),
417 ("groovy", Language::Groovy),
418 ("gradle", Language::Groovy),
419 ("hs", Language::Haskell),
420 ("lhs", Language::Haskell),
421 ("html", Language::Html),
422 ("htm", Language::Html),
423 ("xhtml", Language::Html),
424 ("jl", Language::Julia),
425 ("kt", Language::Kotlin),
426 ("kts", Language::Kotlin),
427 ("lua", Language::Lua),
428 ("mk", Language::Makefile),
429 ("nim", Language::Nim),
430 ("nims", Language::Nim),
431 ("m", Language::ObjectiveC),
432 ("mm", Language::ObjectiveC),
433 ("ml", Language::Ocaml),
434 ("mli", Language::Ocaml),
435 ("pl", Language::Perl),
436 ("pm", Language::Perl),
437 ("t", Language::Perl),
438 ("php", Language::Php),
439 ("php3", Language::Php),
440 ("php4", Language::Php),
441 ("php5", Language::Php),
442 ("php7", Language::Php),
443 ("phtml", Language::Php),
444 ("r", Language::R),
445 ("rb", Language::Ruby),
446 ("rake", Language::Ruby),
447 ("scala", Language::Scala),
448 ("sc", Language::Scala),
449 ("scss", Language::Scss),
450 ("sass", Language::Scss),
451 ("sql", Language::Sql),
452 ("svelte", Language::Svelte),
453 ("swift", Language::Swift),
454 ("vue", Language::Vue),
455 ("xml", Language::Xml),
456 ("xsd", Language::Xml),
457 ("xsl", Language::Xml),
458 ("xslt", Language::Xml),
459 ("svg", Language::Xml),
460 ("zig", Language::Zig),
461 ];
462 EXT_MAP.iter().find_map(|&(e, l)| (e == ext).then_some(l))
463}
464
465fn detect_by_filename(filename: &str, filename_lower: &str) -> Option<Language> {
467 if filename == "Dockerfile"
469 || filename.starts_with("Dockerfile.")
470 || filename_lower == "dockerfile"
471 {
472 return Some(Language::Dockerfile);
473 }
474 if matches!(
476 filename,
477 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
478 ) {
479 return Some(Language::Makefile);
480 }
481 if matches!(
483 filename,
484 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
485 ) {
486 return Some(Language::Ruby);
487 }
488 None
489}
490
491#[must_use]
492#[allow(clippy::too_many_lines)]
493pub fn detect_language(
494 path: &Path,
495 first_line: Option<&str>,
496 extension_overrides: &BTreeMap<String, String>,
497 shebang_detection: bool,
498) -> Option<Language> {
499 let extension = path
500 .extension()
501 .and_then(|ext| ext.to_str())
502 .map(str::to_ascii_lowercase);
503
504 if let Some(ext) = extension.as_ref() {
506 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
507 if let Some(lang) = Language::from_name(override_name) {
508 return Some(lang);
509 }
510 }
511 }
512
513 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
515 let filename_lower = filename.to_ascii_lowercase();
516
517 if let Some(lang) = detect_by_filename(filename, &filename_lower) {
518 return Some(lang);
519 }
520
521 if let Some(lang) = extension.as_deref().and_then(detect_by_extension) {
523 return Some(lang);
524 }
525
526 if shebang_detection {
528 if let Some(line) = first_line {
529 if let Some(lang) = detect_by_shebang(line) {
530 return Some(lang);
531 }
532 }
533 }
534
535 None
536}
537
538#[must_use]
539pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
540 #[cfg(feature = "tree-sitter")]
542 {
543 match language {
544 Language::C | Language::Cpp => {
545 if let Some(mut result) = ts::analyze_c(text) {
546 result.style_analysis = style::analyze_style(language, text);
547 return result;
548 }
549 }
550 Language::Python => {
551 if let Some(result) = ts::analyze_python(text) {
552 return result;
553 }
554 }
555 _ => {}
556 }
557 }
558
559 let (mut config, has_preprocessor) = language_scan_config(language);
560
561 if language == Language::Python {
563 config.skip_lines = detect_python_docstring_lines(text);
564 }
565
566 let flags = IeeeFlags {
569 has_preprocessor_directives: has_preprocessor,
570 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
571 collapse_continuation_lines: options.collapse_continuation_lines,
572 };
573 let mut result = analyze_generic(text, config, flags);
574 result.style_analysis = style::analyze_style(language, text);
575 result
576}
577
578fn language_scan_config(language: Language) -> (ScanConfig, bool) {
586 let cfg = LANG_SCAN_TABLE
587 .iter()
588 .find_map(|&(l, c)| (l == language).then_some(c))
589 .unwrap_or_else(|| panic!("language_scan_config: no entry for {language:?}"));
590 (
591 ScanConfig {
592 line_comments: cfg.line_comments,
593 block_comment: cfg.block_comment,
594 allow_single_quote_strings: cfg.allow_single_quote_strings,
595 allow_double_quote_strings: cfg.allow_double_quote_strings,
596 allow_triple_quote_strings: cfg.allow_triple_quote_strings,
597 allow_csharp_verbatim_strings: cfg.allow_csharp_verbatim_strings,
598 skip_lines: HashSet::new(),
599 symbol_patterns: cfg.symbol_patterns,
600 },
601 cfg.has_preprocessor,
602 )
603}
604
605#[derive(Debug, Clone, Copy)]
609struct SymbolPatterns {
610 functions: &'static [&'static str],
611 functions_prefix_paren: &'static [&'static str],
617 classes: &'static [&'static str],
618 variables: &'static [&'static str],
619 imports: &'static [&'static str],
620 tests: &'static [&'static str],
623 assertions: &'static [&'static str],
626 test_suites: &'static [&'static str],
629}
630
631impl SymbolPatterns {
632 const fn none() -> Self {
633 Self {
634 functions: &[],
635 functions_prefix_paren: &[],
636 classes: &[],
637 variables: &[],
638 imports: &[],
639 tests: &[],
640 assertions: &[],
641 test_suites: &[],
642 }
643 }
644}
645
646const SP_NONE: SymbolPatterns = SymbolPatterns::none(); const SP_RUST: SymbolPatterns = SymbolPatterns {
649 functions: &[
650 "fn ",
651 "pub fn ",
652 "pub(crate) fn ",
653 "pub(super) fn ",
654 "async fn ",
655 "pub async fn ",
656 "pub(crate) async fn ",
657 "unsafe fn ",
658 "pub unsafe fn ",
659 "pub(crate) unsafe fn ",
660 "const fn ",
661 "pub const fn ",
662 "pub(crate) const fn ",
663 "extern fn ",
664 "pub extern fn ",
665 ],
666 functions_prefix_paren: &[],
667 classes: &[
668 "struct ",
669 "pub struct ",
670 "pub(crate) struct ",
671 "enum ",
672 "pub enum ",
673 "pub(crate) enum ",
674 "trait ",
675 "pub trait ",
676 "pub(crate) trait ",
677 "impl ",
678 "impl<",
679 "type ",
680 "pub type ",
681 "pub(crate) type ",
682 ],
683 variables: &["let ", "let mut "],
684 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
685 tests: &[
687 "#[test]",
688 "#[tokio::test]",
689 "#[actix_web::test]",
690 "#[rstest]",
691 "#[test_case",
692 ],
693 assertions: &[
694 "assert_eq!(",
695 "assert_ne!(",
696 "assert!(",
697 "assert_matches!(",
698 "assert_err!(",
699 "assert_ok!(",
700 ],
701 test_suites: &[],
702};
703
704const SP_PYTHON: SymbolPatterns = SymbolPatterns {
705 functions: &["def ", "async def "],
706 functions_prefix_paren: &[],
707 classes: &["class "],
708 variables: &[],
709 imports: &["import ", "from "],
710 tests: &["def test_", "async def test_", "class Test"],
712 assertions: &[
713 "self.assertEqual(",
714 "self.assertNotEqual(",
715 "self.assertTrue(",
716 "self.assertFalse(",
717 "self.assertIsNone(",
718 "self.assertIsNotNone(",
719 "self.assertIn(",
720 "self.assertNotIn(",
721 "self.assertRaises(",
722 "self.assertAlmostEqual(",
723 ],
724 test_suites: &[],
725};
726
727const SP_JS: SymbolPatterns = SymbolPatterns {
728 functions: &[
729 "function ",
730 "async function ",
731 "export function ",
732 "export async function ",
733 "export default function ",
734 ],
735 functions_prefix_paren: &[],
736 classes: &["class ", "export class ", "export default class "],
737 variables: &[
738 "var ",
739 "let ",
740 "const ",
741 "export var ",
742 "export let ",
743 "export const ",
744 ],
745 imports: &["import "],
746 tests: &[
748 "describe(",
749 "it(",
750 "test(",
751 "it.each(",
752 "test.each(",
753 "describe.each(",
754 ],
755 assertions: &["expect("],
756 test_suites: &[],
757};
758
759const SP_TS: SymbolPatterns = SymbolPatterns {
760 functions: &[
761 "function ",
762 "async function ",
763 "export function ",
764 "export async function ",
765 "export default function ",
766 ],
767 functions_prefix_paren: &[],
768 classes: &[
769 "class ",
770 "export class ",
771 "export default class ",
772 "abstract class ",
773 "export abstract class ",
774 "interface ",
775 "export interface ",
776 "declare class ",
777 "declare interface ",
778 ],
779 variables: &[
780 "var ",
781 "let ",
782 "const ",
783 "export var ",
784 "export let ",
785 "export const ",
786 ],
787 imports: &["import "],
788 tests: &[
790 "describe(",
791 "it(",
792 "test(",
793 "it.each(",
794 "test.each(",
795 "describe.each(",
796 ],
797 assertions: &["expect("],
798 test_suites: &[],
799};
800
801const SP_GO: SymbolPatterns = SymbolPatterns {
802 functions: &["func "],
803 functions_prefix_paren: &[],
804 classes: &["type "],
805 variables: &["var "],
806 imports: &["import "],
807 tests: &["func Test", "func Benchmark", "func Fuzz"],
809 assertions: &[],
810 test_suites: &[],
811};
812
813const SP_JAVA: SymbolPatterns = SymbolPatterns {
814 functions: &[],
815 functions_prefix_paren: &[],
816 classes: &[
817 "class ",
818 "public class ",
819 "private class ",
820 "protected class ",
821 "abstract class ",
822 "final class ",
823 "public abstract class ",
824 "public final class ",
825 "interface ",
826 "public interface ",
827 "enum ",
828 "public enum ",
829 "record ",
830 "public record ",
831 "@interface ",
832 ],
833 variables: &[],
834 imports: &["import "],
835 tests: &[
837 "@Test",
838 "@ParameterizedTest",
839 "@RepeatedTest",
840 "@TestFactory",
841 "@TestTemplate",
842 ],
843 assertions: &[
844 "assertEquals(",
845 "assertNotEquals(",
846 "assertTrue(",
847 "assertFalse(",
848 "assertNull(",
849 "assertNotNull(",
850 "assertThat(",
851 "assertThrows(",
852 "assertAll(",
853 "assertArrayEquals(",
854 "assertIterableEquals(",
855 "assertLinesMatch(",
856 ],
857 test_suites: &[],
858};
859
860const SP_CSHARP: SymbolPatterns = SymbolPatterns {
861 functions: &[],
862 functions_prefix_paren: &[],
863 classes: &[
864 "class ",
865 "public class ",
866 "private class ",
867 "protected class ",
868 "internal class ",
869 "abstract class ",
870 "sealed class ",
871 "static class ",
872 "partial class ",
873 "public abstract class ",
874 "public sealed class ",
875 "public static class ",
876 "interface ",
877 "public interface ",
878 "internal interface ",
879 "enum ",
880 "public enum ",
881 "struct ",
882 "public struct ",
883 "record ",
884 "public record ",
885 ],
886 variables: &["var "],
887 imports: &["using "],
888 tests: &[
890 "[TestMethod]",
891 "[Test]",
892 "[Fact]",
893 "[Theory]",
894 "[TestCase(",
895 "[DataRow(",
896 "[InlineData(",
897 "[MemberData(",
898 ],
899 assertions: &[
900 "Assert.AreEqual(",
901 "Assert.AreNotEqual(",
902 "Assert.IsTrue(",
903 "Assert.IsFalse(",
904 "Assert.IsNull(",
905 "Assert.IsNotNull(",
906 "Assert.Equal(",
907 "Assert.NotEqual(",
908 "Assert.True(",
909 "Assert.False(",
910 "Assert.That(",
911 "Assert.Contains(",
912 "Assert.Throws(",
913 "Assert.ThrowsAsync(",
914 "Assert.IsInstanceOfType(",
915 ],
916 test_suites: &["[TestClass]", "[TestFixture]", "[SetUpFixture]"],
917};
918
919const TEST_PATTERNS_C_CPP: &[&str] = &[
921 "TEST(",
923 "TEST_F(",
924 "TEST_P(",
925 "TYPED_TEST(",
926 "TYPED_TEST_P(",
927 "INSTANTIATE_TEST_SUITE_P(",
928 "INSTANTIATE_TYPED_TEST_SUITE_P(",
929 "TEST_CASE(",
931 "SECTION(",
932 "SCENARIO(",
933 "SCENARIO_METHOD(",
934 "TEST_CASE_METHOD(",
935 "BOOST_AUTO_TEST_CASE(",
937 "BOOST_FIXTURE_TEST_CASE(",
938 "BOOST_AUTO_TEST_SUITE(",
939 "BOOST_PARAM_TEST_CASE(",
940 "CPPUNIT_TEST(",
942 "CPPUNIT_TEST_SUITE(",
943 "RUN_TEST(",
945 "TEST_IGNORE(",
946 "TEST_FAIL(",
947 "START_TEST(",
949 "tcase_add_test(",
950 "suite_create(",
951 "cmocka_unit_test(",
953 "cmocka_run_group_tests(",
954 "IGNORE_TEST(",
956 "TEST_GROUP(",
957 "TEST_GROUP_BASE(",
958];
959
960const ASSERT_PATTERNS_C_CPP: &[&str] = &[
962 "ASSERT_EQ(",
964 "ASSERT_NE(",
965 "ASSERT_LT(",
966 "ASSERT_LE(",
967 "ASSERT_GT(",
968 "ASSERT_GE(",
969 "ASSERT_TRUE(",
970 "ASSERT_FALSE(",
971 "ASSERT_STREQ(",
972 "ASSERT_STRNE(",
973 "ASSERT_FLOAT_EQ(",
974 "ASSERT_DOUBLE_EQ(",
975 "ASSERT_NEAR(",
976 "ASSERT_THROW(",
977 "ASSERT_NO_THROW(",
978 "ASSERT_ANY_THROW(",
979 "EXPECT_EQ(",
981 "EXPECT_NE(",
982 "EXPECT_LT(",
983 "EXPECT_LE(",
984 "EXPECT_GT(",
985 "EXPECT_GE(",
986 "EXPECT_TRUE(",
987 "EXPECT_FALSE(",
988 "EXPECT_STREQ(",
989 "EXPECT_STRNE(",
990 "EXPECT_FLOAT_EQ(",
991 "EXPECT_DOUBLE_EQ(",
992 "EXPECT_NEAR(",
993 "EXPECT_THROW(",
994 "EXPECT_NO_THROW(",
995 "EXPECT_ANY_THROW(",
996 "REQUIRE(",
998 "CHECK(",
999 "REQUIRE_FALSE(",
1000 "CHECK_FALSE(",
1001 "REQUIRE_NOTHROW(",
1002 "CHECK_NOTHROW(",
1003 "REQUIRE_THROWS(",
1004 "CHECK_THROWS(",
1005 "REQUIRE_THAT(",
1006 "CHECK_THAT(",
1007 "TEST_ASSERT_EQUAL(",
1009 "TEST_ASSERT_EQUAL_INT(",
1010 "TEST_ASSERT_EQUAL_STRING(",
1011 "TEST_ASSERT_EQUAL_FLOAT(",
1012 "TEST_ASSERT_EQUAL_DOUBLE(",
1013 "TEST_ASSERT_EQUAL_PTR(",
1014 "TEST_ASSERT_TRUE(",
1015 "TEST_ASSERT_FALSE(",
1016 "TEST_ASSERT_NULL(",
1017 "TEST_ASSERT_NOT_NULL(",
1018 "TEST_ASSERT_BITS_HIGH(",
1019 "TEST_ASSERT_BITS_LOW(",
1020 "assert_int_equal(",
1022 "assert_int_not_equal(",
1023 "assert_string_equal(",
1024 "assert_string_not_equal(",
1025 "assert_true(",
1026 "assert_false(",
1027 "assert_null(",
1028 "assert_non_null(",
1029 "assert_ptr_equal(",
1030 "assert_memory_equal(",
1031 "assert_return_code(",
1032];
1033
1034const SUITE_PATTERNS_C_CPP: &[&str] = &[
1036 "TEST_GROUP(",
1037 "TEST_GROUP_BASE(",
1038 "BOOST_AUTO_TEST_SUITE(",
1039 "CPPUNIT_TEST_SUITE(",
1040 "CPPUNIT_TEST_SUITE_END(",
1041];
1042
1043const SP_C: SymbolPatterns = SymbolPatterns {
1044 functions: &[],
1046 functions_prefix_paren: &[
1047 "void ",
1048 "int ",
1049 "char ",
1050 "float ",
1051 "double ",
1052 "long ",
1053 "unsigned ",
1054 "size_t ",
1055 "static ",
1056 "inline ",
1057 "const ",
1058 "extern ",
1059 ],
1060 classes: &[
1061 "struct ",
1062 "typedef struct ",
1063 "union ",
1064 "typedef union ",
1065 "typedef enum ",
1066 ],
1067 variables: &[],
1068 imports: &["#include "],
1069 tests: TEST_PATTERNS_C_CPP,
1070 assertions: ASSERT_PATTERNS_C_CPP,
1071 test_suites: SUITE_PATTERNS_C_CPP,
1072};
1073
1074const SP_CPP: SymbolPatterns = SymbolPatterns {
1075 functions: &[
1077 "virtual ", "explicit ", "~", "operator", ],
1082 functions_prefix_paren: &[
1083 "void ",
1084 "bool ",
1085 "int ",
1086 "char ",
1087 "float ",
1088 "double ",
1089 "long ",
1090 "unsigned ",
1091 "size_t ",
1092 "auto ",
1093 "static ",
1094 "inline ",
1095 "constexpr ",
1096 "const ",
1097 "extern ",
1098 ],
1099 classes: &["class ", "struct ", "namespace ", "template ", "template<"],
1101 variables: &[],
1102 imports: &["#include "],
1103 tests: TEST_PATTERNS_C_CPP,
1104 assertions: ASSERT_PATTERNS_C_CPP,
1105 test_suites: SUITE_PATTERNS_C_CPP,
1106};
1107
1108const SP_SHELL: SymbolPatterns = SymbolPatterns {
1109 functions: &["function "],
1110 functions_prefix_paren: &[],
1111 classes: &[],
1112 variables: &["declare ", "local ", "export "],
1113 imports: &["source ", ". "],
1114 tests: &[],
1115 assertions: &[],
1116 test_suites: &[],
1117};
1118
1119const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1120 functions: &["function ", "Function "],
1121 functions_prefix_paren: &[],
1122 classes: &["class "],
1123 variables: &[],
1124 imports: &["Import-Module ", "using "],
1125 tests: &["Describe ", "It ", "Context "],
1127 assertions: &[],
1128 test_suites: &[],
1129};
1130
1131const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1132 functions: &[
1133 "fun ",
1134 "private fun ",
1135 "public fun ",
1136 "protected fun ",
1137 "internal fun ",
1138 "override fun ",
1139 "suspend fun ",
1140 "abstract fun ",
1141 "open fun ",
1142 "private suspend fun ",
1143 "public suspend fun ",
1144 ],
1145 functions_prefix_paren: &[],
1146 classes: &[
1147 "class ",
1148 "data class ",
1149 "sealed class ",
1150 "abstract class ",
1151 "open class ",
1152 "object ",
1153 "companion object",
1154 "interface ",
1155 "enum class ",
1156 "annotation class ",
1157 ],
1158 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1159 imports: &["import "],
1160 tests: &[
1162 "@Test",
1163 "@ParameterizedTest",
1164 "@RepeatedTest",
1165 "\"should ",
1166 "\"it ",
1167 ],
1168 assertions: &[
1169 "assertEquals(",
1170 "assertNotEquals(",
1171 "assertTrue(",
1172 "assertFalse(",
1173 "assertNull(",
1174 "assertNotNull(",
1175 "assertThat(",
1176 "assertThrows(",
1177 "shouldBe(",
1178 "shouldNotBe(",
1179 "shouldThrow(",
1180 ],
1181 test_suites: &[],
1182};
1183
1184const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1185 functions: &[
1186 "func ",
1187 "private func ",
1188 "public func ",
1189 "internal func ",
1190 "override func ",
1191 "open func ",
1192 "static func ",
1193 "class func ",
1194 "mutating func ",
1195 "private static func ",
1196 "public static func ",
1197 ],
1198 functions_prefix_paren: &[],
1199 classes: &[
1200 "class ",
1201 "struct ",
1202 "protocol ",
1203 "enum ",
1204 "extension ",
1205 "actor ",
1206 "public class ",
1207 "private class ",
1208 "open class ",
1209 "final class ",
1210 "public struct ",
1211 "private struct ",
1212 "public protocol ",
1213 ],
1214 variables: &[
1215 "var ",
1216 "let ",
1217 "private var ",
1218 "private let ",
1219 "static var ",
1220 "static let ",
1221 ],
1222 imports: &["import "],
1223 tests: &["func test", "func Test", "@Test"],
1225 assertions: &[
1226 "XCTAssertEqual(",
1227 "XCTAssertNotEqual(",
1228 "XCTAssertTrue(",
1229 "XCTAssertFalse(",
1230 "XCTAssertNil(",
1231 "XCTAssertNotNil(",
1232 "XCTAssertGreaterThan(",
1233 "XCTAssertLessThan(",
1234 "XCTAssertThrowsError(",
1235 "XCTAssertNoThrow(",
1236 "#expect(",
1237 ],
1238 test_suites: &[],
1239};
1240
1241const SP_RUBY: SymbolPatterns = SymbolPatterns {
1242 functions: &["def ", "private def ", "protected def "],
1243 functions_prefix_paren: &[],
1244 classes: &["class ", "module "],
1245 variables: &[],
1246 imports: &["require ", "require_relative "],
1247 tests: &["it ", "it(", "describe ", "context ", "test "],
1249 assertions: &[],
1250 test_suites: &[],
1251};
1252
1253const SP_SCALA: SymbolPatterns = SymbolPatterns {
1254 functions: &["def ", "private def ", "protected def ", "override def "],
1255 functions_prefix_paren: &[],
1256 classes: &[
1257 "class ",
1258 "case class ",
1259 "abstract class ",
1260 "sealed class ",
1261 "object ",
1262 "trait ",
1263 ],
1264 variables: &["val ", "var ", "lazy val "],
1265 imports: &["import "],
1266 tests: &["test(", "it(", "describe("],
1268 assertions: &[],
1269 test_suites: &[],
1270};
1271
1272const SP_PHP: SymbolPatterns = SymbolPatterns {
1273 functions: &[
1274 "function ",
1275 "public function ",
1276 "private function ",
1277 "protected function ",
1278 "static function ",
1279 "abstract function ",
1280 "final function ",
1281 "public static function ",
1282 "private static function ",
1283 "protected static function ",
1284 ],
1285 functions_prefix_paren: &[],
1286 classes: &[
1287 "class ",
1288 "abstract class ",
1289 "final class ",
1290 "interface ",
1291 "trait ",
1292 "enum ",
1293 ],
1294 variables: &[],
1295 imports: &[
1296 "use ",
1297 "require ",
1298 "require_once ",
1299 "include ",
1300 "include_once ",
1301 ],
1302 tests: &[
1304 "public function test",
1305 "function test",
1306 "#[Test]",
1307 "#[DataProvider(",
1308 ],
1309 assertions: &[],
1310 test_suites: &[],
1311};
1312
1313const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1314 functions: &[
1315 "def ",
1316 "defp ",
1317 "defmacro ",
1318 "defmacrop ",
1319 "defguard ",
1320 "defguardp ",
1321 ],
1322 functions_prefix_paren: &[],
1323 classes: &["defmodule ", "defprotocol ", "defimpl "],
1324 variables: &[],
1325 imports: &["import ", "alias ", "use ", "require "],
1326 tests: &["test ", "describe "],
1328 assertions: &[],
1329 test_suites: &[],
1330};
1331
1332const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1333 functions: &[],
1334 functions_prefix_paren: &[],
1335 classes: &["-module("],
1336 variables: &[],
1337 imports: &["-import(", "-include(", "-include_lib("],
1338 tests: &[],
1339 assertions: &[],
1340 test_suites: &[],
1341};
1342
1343const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1344 functions: &[
1345 "let ",
1346 "let rec ",
1347 "member ",
1348 "override ",
1349 "abstract member ",
1350 ],
1351 functions_prefix_paren: &[],
1352 classes: &["type "],
1353 variables: &["let mutable "],
1354 imports: &["open "],
1355 tests: &["[<Test>]", "[<Fact>]", "[<Theory>]", "[<TestCase("],
1357 assertions: &[],
1358 test_suites: &[],
1359};
1360
1361const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1362 functions: &["def ", "private def ", "public def ", "protected def "],
1363 functions_prefix_paren: &[],
1364 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1365 variables: &[],
1366 imports: &["import "],
1367 tests: &["def \"", "@Test", "given:", "when:", "then:", "expect:"],
1369 assertions: &[],
1370 test_suites: &[],
1371};
1372
1373const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1374 functions: &[],
1375 functions_prefix_paren: &[],
1376 classes: &["class ", "data ", "newtype ", "type "],
1377 variables: &[],
1378 imports: &["import "],
1379 tests: &[],
1380 assertions: &[],
1381 test_suites: &[],
1382};
1383
1384const SP_LUA: SymbolPatterns = SymbolPatterns {
1385 functions: &["function ", "local function "],
1386 functions_prefix_paren: &[],
1387 classes: &[],
1388 variables: &["local "],
1389 imports: &[],
1390 tests: &["it(", "describe(", "pending("],
1392 assertions: &[],
1393 test_suites: &[],
1394};
1395
1396const SP_NIM: SymbolPatterns = SymbolPatterns {
1397 functions: &[
1398 "proc ",
1399 "func ",
1400 "method ",
1401 "iterator ",
1402 "converter ",
1403 "template ",
1404 "macro ",
1405 ],
1406 functions_prefix_paren: &[],
1407 classes: &["type "],
1408 variables: &["var ", "let ", "const "],
1409 imports: &["import ", "from "],
1410 tests: &["test "],
1412 assertions: &[],
1413 test_suites: &[],
1414};
1415
1416const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1417 functions: &["- (", "+ ("],
1418 functions_prefix_paren: &[],
1419 classes: &["@interface ", "@implementation ", "@protocol "],
1420 variables: &[],
1421 imports: &["#import ", "#include "],
1422 tests: &["- (void)test"],
1424 assertions: &[
1425 "XCTAssertEqual(",
1426 "XCTAssertNotEqual(",
1427 "XCTAssertTrue(",
1428 "XCTAssertFalse(",
1429 "XCTAssertNil(",
1430 "XCTAssertNotNil(",
1431 "XCTAssertGreaterThan(",
1432 "XCTAssertLessThan(",
1433 "XCTAssertThrowsError(",
1434 "XCTAssertNoThrow(",
1435 ],
1436 test_suites: &[],
1437};
1438
1439const SP_OCAML: SymbolPatterns = SymbolPatterns {
1440 functions: &["let ", "let rec "],
1441 functions_prefix_paren: &[],
1442 classes: &["type ", "module ", "class "],
1443 variables: &[],
1444 imports: &["open "],
1445 tests: &[],
1446 assertions: &[],
1447 test_suites: &[],
1448};
1449
1450const SP_PERL: SymbolPatterns = SymbolPatterns {
1451 functions: &["sub "],
1452 functions_prefix_paren: &[],
1453 classes: &["package "],
1454 variables: &["my ", "our ", "local "],
1455 imports: &["use ", "require "],
1456 tests: &[],
1457 assertions: &[],
1458 test_suites: &[],
1459};
1460
1461const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1462 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1463 functions_prefix_paren: &[],
1464 classes: &[
1465 "(defrecord ",
1466 "(defprotocol ",
1467 "(deftype ",
1468 "(definterface ",
1469 ],
1470 variables: &["(def ", "(defonce "],
1471 imports: &["(ns ", "(require "],
1472 tests: &["(deftest ", "(testing "],
1474 assertions: &[],
1475 test_suites: &[],
1476};
1477
1478const SP_JULIA: SymbolPatterns = SymbolPatterns {
1479 functions: &["function ", "macro "],
1480 functions_prefix_paren: &[],
1481 classes: &[
1482 "struct ",
1483 "mutable struct ",
1484 "abstract type ",
1485 "primitive type ",
1486 ],
1487 variables: &["const "],
1488 imports: &["import ", "using "],
1489 tests: &["@test ", "@testset "],
1491 assertions: &[],
1492 test_suites: &[],
1493};
1494
1495const SP_DART: SymbolPatterns = SymbolPatterns {
1496 functions: &[],
1497 functions_prefix_paren: &[],
1498 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1499 variables: &["var ", "final ", "const ", "late "],
1500 imports: &["import "],
1501 tests: &["test(", "testWidgets(", "group("],
1503 assertions: &[],
1504 test_suites: &[],
1505};
1506
1507const SP_R: SymbolPatterns = SymbolPatterns {
1508 functions: &[],
1509 functions_prefix_paren: &[],
1510 classes: &[],
1511 variables: &[],
1512 imports: &["library(", "source("],
1513 tests: &["test_that(", "it(", "describe(", "expect_"],
1515 assertions: &[],
1516 test_suites: &[],
1517};
1518
1519const SP_SQL: SymbolPatterns = SymbolPatterns {
1520 functions: &[
1521 "create function ",
1522 "create or replace function ",
1523 "create procedure ",
1524 "create or replace procedure ",
1525 "CREATE FUNCTION ",
1526 "CREATE OR REPLACE FUNCTION ",
1527 "CREATE PROCEDURE ",
1528 "CREATE OR REPLACE PROCEDURE ",
1529 ],
1530 functions_prefix_paren: &[],
1531 classes: &[
1532 "create table ",
1533 "create view ",
1534 "create schema ",
1535 "CREATE TABLE ",
1536 "CREATE VIEW ",
1537 "CREATE SCHEMA ",
1538 ],
1539 variables: &["declare ", "DECLARE "],
1540 imports: &[],
1541 tests: &[],
1542 assertions: &[],
1543 test_suites: &[],
1544};
1545
1546const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1547 functions: &["proc ", "PROC "],
1548 functions_prefix_paren: &[],
1549 classes: &[],
1550 variables: &[],
1551 imports: &["include ", "INCLUDE ", "%include "],
1552 tests: &[],
1553 assertions: &[],
1554 test_suites: &[],
1555};
1556
1557const SP_ZIG: SymbolPatterns = SymbolPatterns {
1558 functions: &[
1559 "fn ",
1560 "pub fn ",
1561 "export fn ",
1562 "inline fn ",
1563 "pub inline fn ",
1564 ],
1565 functions_prefix_paren: &[],
1566 classes: &[],
1567 variables: &["var ", "pub var "],
1568 imports: &[],
1569 tests: &["test \"", "test{"],
1571 assertions: &[],
1572 test_suites: &[],
1573};
1574
1575#[allow(clippy::struct_excessive_bools)]
1579#[derive(Clone, Copy)]
1580struct StaticLangConfig {
1581 line_comments: &'static [&'static str],
1582 block_comment: Option<(&'static str, &'static str)>,
1583 allow_single_quote_strings: bool,
1584 allow_double_quote_strings: bool,
1585 allow_triple_quote_strings: bool,
1586 allow_csharp_verbatim_strings: bool,
1587 symbol_patterns: SymbolPatterns,
1588 has_preprocessor: bool,
1590}
1591
1592#[allow(clippy::struct_excessive_bools)]
1593#[derive(Debug, Clone)]
1594struct ScanConfig {
1595 line_comments: &'static [&'static str],
1596 block_comment: Option<(&'static str, &'static str)>,
1597 allow_single_quote_strings: bool,
1598 allow_double_quote_strings: bool,
1599 allow_triple_quote_strings: bool,
1600 allow_csharp_verbatim_strings: bool,
1601 skip_lines: HashSet<usize>,
1602 symbol_patterns: SymbolPatterns,
1603}
1604
1605const C_SLASH_BASE: StaticLangConfig = StaticLangConfig {
1615 line_comments: &["//"],
1616 block_comment: Some(("/*", "*/")),
1617 allow_single_quote_strings: true,
1618 allow_double_quote_strings: true,
1619 allow_triple_quote_strings: false,
1620 allow_csharp_verbatim_strings: false,
1621 symbol_patterns: SP_NONE,
1622 has_preprocessor: false,
1623};
1624
1625const HASH_BASE: StaticLangConfig = StaticLangConfig {
1629 line_comments: &["#"],
1630 block_comment: None,
1631 allow_single_quote_strings: true,
1632 allow_double_quote_strings: true,
1633 allow_triple_quote_strings: false,
1634 allow_csharp_verbatim_strings: false,
1635 symbol_patterns: SP_NONE,
1636 has_preprocessor: false,
1637};
1638
1639static LANG_SCAN_TABLE: &[(Language, StaticLangConfig)] = &[
1643 (
1645 Language::C,
1646 StaticLangConfig {
1647 symbol_patterns: SP_C,
1648 has_preprocessor: true,
1649 ..C_SLASH_BASE
1650 },
1651 ),
1652 (
1653 Language::Cpp,
1654 StaticLangConfig {
1655 symbol_patterns: SP_CPP,
1656 has_preprocessor: true,
1657 ..C_SLASH_BASE
1658 },
1659 ),
1660 (
1661 Language::ObjectiveC,
1662 StaticLangConfig {
1663 symbol_patterns: SP_OBJECTIVEC,
1664 has_preprocessor: true,
1665 ..C_SLASH_BASE
1666 },
1667 ),
1668 (
1670 Language::CSharp,
1671 StaticLangConfig {
1672 symbol_patterns: SP_CSHARP,
1673 allow_csharp_verbatim_strings: true,
1674 ..C_SLASH_BASE
1675 },
1676 ),
1677 (
1678 Language::Go,
1679 StaticLangConfig {
1680 symbol_patterns: SP_GO,
1681 ..C_SLASH_BASE
1682 },
1683 ),
1684 (
1685 Language::Java,
1686 StaticLangConfig {
1687 symbol_patterns: SP_JAVA,
1688 ..C_SLASH_BASE
1689 },
1690 ),
1691 (
1692 Language::JavaScript,
1693 StaticLangConfig {
1694 symbol_patterns: SP_JS,
1695 ..C_SLASH_BASE
1696 },
1697 ),
1698 (
1699 Language::TypeScript,
1700 StaticLangConfig {
1701 symbol_patterns: SP_TS,
1702 ..C_SLASH_BASE
1703 },
1704 ),
1705 (
1706 Language::Svelte,
1707 StaticLangConfig {
1708 symbol_patterns: SP_JS,
1709 ..C_SLASH_BASE
1710 },
1711 ),
1712 (
1713 Language::Vue,
1714 StaticLangConfig {
1715 symbol_patterns: SP_JS,
1716 ..C_SLASH_BASE
1717 },
1718 ),
1719 (
1720 Language::Dart,
1721 StaticLangConfig {
1722 symbol_patterns: SP_DART,
1723 ..C_SLASH_BASE
1724 },
1725 ),
1726 (
1727 Language::Groovy,
1728 StaticLangConfig {
1729 symbol_patterns: SP_GROOVY,
1730 ..C_SLASH_BASE
1731 },
1732 ),
1733 (
1734 Language::Kotlin,
1735 StaticLangConfig {
1736 symbol_patterns: SP_KOTLIN,
1737 ..C_SLASH_BASE
1738 },
1739 ),
1740 (
1741 Language::Scala,
1742 StaticLangConfig {
1743 symbol_patterns: SP_SCALA,
1744 ..C_SLASH_BASE
1745 },
1746 ),
1747 (
1748 Language::Scss,
1749 StaticLangConfig {
1750 symbol_patterns: SP_NONE,
1751 ..C_SLASH_BASE
1752 },
1753 ),
1754 (
1756 Language::Rust,
1757 StaticLangConfig {
1758 symbol_patterns: SP_RUST,
1759 allow_single_quote_strings: false,
1760 ..C_SLASH_BASE
1761 },
1762 ),
1763 (
1765 Language::Swift,
1766 StaticLangConfig {
1767 symbol_patterns: SP_SWIFT,
1768 allow_single_quote_strings: false,
1769 ..C_SLASH_BASE
1770 },
1771 ),
1772 (
1774 Language::Zig,
1775 StaticLangConfig {
1776 symbol_patterns: SP_ZIG,
1777 block_comment: None,
1778 ..C_SLASH_BASE
1779 },
1780 ),
1781 (
1783 Language::FSharp,
1784 StaticLangConfig {
1785 line_comments: &["//"],
1786 block_comment: Some(("(*", "*)")),
1787 allow_single_quote_strings: false,
1788 allow_double_quote_strings: true,
1789 symbol_patterns: SP_FSHARP,
1790 ..C_SLASH_BASE
1791 },
1792 ),
1793 (
1795 Language::Shell,
1796 StaticLangConfig {
1797 symbol_patterns: SP_SHELL,
1798 ..HASH_BASE
1799 },
1800 ),
1801 (
1802 Language::Elixir,
1803 StaticLangConfig {
1804 symbol_patterns: SP_ELIXIR,
1805 ..HASH_BASE
1806 },
1807 ),
1808 (
1809 Language::Perl,
1810 StaticLangConfig {
1811 symbol_patterns: SP_PERL,
1812 ..HASH_BASE
1813 },
1814 ),
1815 (
1816 Language::R,
1817 StaticLangConfig {
1818 symbol_patterns: SP_R,
1819 ..HASH_BASE
1820 },
1821 ),
1822 (
1823 Language::Ruby,
1824 StaticLangConfig {
1825 symbol_patterns: SP_RUBY,
1826 ..HASH_BASE
1827 },
1828 ),
1829 (
1831 Language::Python,
1832 StaticLangConfig {
1833 symbol_patterns: SP_PYTHON,
1834 allow_triple_quote_strings: true,
1835 ..HASH_BASE
1836 },
1837 ),
1838 (
1840 Language::PowerShell,
1841 StaticLangConfig {
1842 symbol_patterns: SP_POWERSHELL,
1843 block_comment: Some(("<#", "#>")),
1844 ..HASH_BASE
1845 },
1846 ),
1847 (
1849 Language::Nim,
1850 StaticLangConfig {
1851 symbol_patterns: SP_NIM,
1852 block_comment: Some(("#[", "]#")),
1853 ..HASH_BASE
1854 },
1855 ),
1856 (
1858 Language::Makefile,
1859 StaticLangConfig {
1860 symbol_patterns: SP_NONE,
1861 allow_single_quote_strings: false,
1862 allow_double_quote_strings: false,
1863 ..HASH_BASE
1864 },
1865 ),
1866 (
1867 Language::Dockerfile,
1868 StaticLangConfig {
1869 symbol_patterns: SP_NONE,
1870 allow_single_quote_strings: false,
1871 allow_double_quote_strings: false,
1872 ..HASH_BASE
1873 },
1874 ),
1875 (
1878 Language::Css,
1879 StaticLangConfig {
1880 line_comments: &[],
1881 block_comment: Some(("/*", "*/")),
1882 symbol_patterns: SP_NONE,
1883 ..C_SLASH_BASE
1884 },
1885 ),
1886 (
1888 Language::Html,
1889 StaticLangConfig {
1890 line_comments: &[],
1891 block_comment: Some(("<!--", "-->")),
1892 allow_single_quote_strings: false,
1893 allow_double_quote_strings: false,
1894 symbol_patterns: SP_NONE,
1895 ..C_SLASH_BASE
1896 },
1897 ),
1898 (
1899 Language::Xml,
1900 StaticLangConfig {
1901 line_comments: &[],
1902 block_comment: Some(("<!--", "-->")),
1903 allow_single_quote_strings: false,
1904 allow_double_quote_strings: false,
1905 symbol_patterns: SP_NONE,
1906 ..C_SLASH_BASE
1907 },
1908 ),
1909 (
1911 Language::Lua,
1912 StaticLangConfig {
1913 line_comments: &["--"],
1914 block_comment: Some(("--[[", "]]")),
1915 symbol_patterns: SP_LUA,
1916 ..C_SLASH_BASE
1917 },
1918 ),
1919 (
1921 Language::Haskell,
1922 StaticLangConfig {
1923 line_comments: &["--"],
1924 block_comment: Some(("{-", "-}")),
1925 symbol_patterns: SP_HASKELL,
1926 ..C_SLASH_BASE
1927 },
1928 ),
1929 (
1931 Language::Sql,
1932 StaticLangConfig {
1933 line_comments: &["--"],
1934 block_comment: Some(("/*", "*/")),
1935 allow_single_quote_strings: true,
1936 allow_double_quote_strings: false,
1937 symbol_patterns: SP_SQL,
1938 ..C_SLASH_BASE
1939 },
1940 ),
1941 (
1943 Language::Ocaml,
1944 StaticLangConfig {
1945 line_comments: &[],
1946 block_comment: Some(("(*", "*)")),
1947 allow_single_quote_strings: false,
1948 symbol_patterns: SP_OCAML,
1949 ..C_SLASH_BASE
1950 },
1951 ),
1952 (
1954 Language::Assembly,
1955 StaticLangConfig {
1956 line_comments: &[";"],
1957 block_comment: None,
1958 allow_single_quote_strings: false,
1959 allow_double_quote_strings: false,
1960 symbol_patterns: SP_ASSEMBLY,
1961 ..C_SLASH_BASE
1962 },
1963 ),
1964 (
1965 Language::Clojure,
1966 StaticLangConfig {
1967 line_comments: &[";"],
1968 block_comment: None,
1969 allow_single_quote_strings: false,
1970 symbol_patterns: SP_CLOJURE,
1971 ..C_SLASH_BASE
1972 },
1973 ),
1974 (
1976 Language::Erlang,
1977 StaticLangConfig {
1978 line_comments: &["%"],
1979 block_comment: None,
1980 allow_single_quote_strings: false,
1981 symbol_patterns: SP_ERLANG,
1982 ..C_SLASH_BASE
1983 },
1984 ),
1985 (
1987 Language::Php,
1988 StaticLangConfig {
1989 line_comments: &["//", "#"],
1990 block_comment: Some(("/*", "*/")),
1991 symbol_patterns: SP_PHP,
1992 ..C_SLASH_BASE
1993 },
1994 ),
1995 (
1997 Language::Julia,
1998 StaticLangConfig {
1999 line_comments: &["#"],
2000 block_comment: Some(("#=", "=#")),
2001 allow_single_quote_strings: false,
2002 allow_triple_quote_strings: true,
2003 symbol_patterns: SP_JULIA,
2004 ..C_SLASH_BASE
2005 },
2006 ),
2007];
2008
2009#[derive(Debug, Clone, Copy)]
2012struct IeeeFlags {
2013 has_preprocessor_directives: bool,
2015 blank_in_block_comment_as_comment: bool,
2017 collapse_continuation_lines: bool,
2019}
2020
2021#[derive(Debug, Clone, Copy)]
2022enum StringState {
2023 Single(char),
2024 Triple(&'static str),
2025 VerbatimDouble,
2026}
2027
2028#[allow(clippy::struct_excessive_bools)]
2029#[derive(Debug, Default)]
2030struct LineFacts {
2031 has_code: bool,
2032 has_single_comment: bool,
2033 has_multi_comment: bool,
2034 has_docstring: bool,
2035}
2036
2037fn process_string_char(
2041 state: StringState,
2042 chars: &[char],
2043 i: usize,
2044) -> (Option<StringState>, usize) {
2045 match state {
2046 StringState::Single(delim) => {
2047 if chars[i] == '\\' {
2048 return (Some(state), 2); }
2050 if chars[i] == delim {
2051 (None, 1)
2052 } else {
2053 (Some(state), 1)
2054 }
2055 }
2056 StringState::Triple(delim) => {
2057 if starts_with(chars, i, delim) {
2058 (None, delim.len())
2059 } else {
2060 (Some(state), 1)
2061 }
2062 }
2063 StringState::VerbatimDouble => {
2064 if starts_with(chars, i, "\"\"") {
2065 return (Some(state), 2); }
2067 if chars[i] == '"' {
2068 (None, 1)
2069 } else {
2070 (Some(state), 1)
2071 }
2072 }
2073 }
2074}
2075
2076fn process_block_comment_char(chars: &[char], i: usize, close: &str) -> (bool, usize) {
2080 if starts_with(chars, i, close) {
2081 (false, close.len())
2082 } else {
2083 (true, 1)
2084 }
2085}
2086
2087fn try_open_string(chars: &[char], i: usize, config: &ScanConfig) -> Option<(StringState, usize)> {
2091 if config.allow_csharp_verbatim_strings && starts_with(chars, i, "@\"") {
2092 return Some((StringState::VerbatimDouble, 2));
2093 }
2094 if config.allow_triple_quote_strings {
2095 if starts_with(chars, i, "\"\"\"") {
2096 return Some((StringState::Triple("\"\"\""), 3));
2097 }
2098 if starts_with(chars, i, "'''") {
2099 return Some((StringState::Triple("'''"), 3));
2100 }
2101 }
2102 if config.allow_single_quote_strings && chars[i] == '\'' {
2103 return Some((StringState::Single('\''), 1));
2104 }
2105 if config.allow_double_quote_strings && chars[i] == '"' {
2106 return Some((StringState::Single('"'), 1));
2107 }
2108 None
2109}
2110
2111fn step_through_block_comment(
2117 chars: &[char],
2118 i: usize,
2119 block_comment: Option<(&'static str, &'static str)>,
2120 in_block_comment: &mut bool,
2121) -> usize {
2122 if let Some((_, close)) = block_comment {
2123 let (still_in, advance) = process_block_comment_char(chars, i, close);
2124 *in_block_comment = still_in;
2125 return advance;
2126 }
2127 0
2128}
2129
2130fn try_open_block_comment(
2133 chars: &[char],
2134 i: usize,
2135 block_comment: Option<(&'static str, &'static str)>,
2136) -> Option<usize> {
2137 let (open, _) = block_comment?;
2138 starts_with(chars, i, open).then_some(open.len())
2139}
2140
2141fn scan_line(
2145 chars: &[char],
2146 config: &ScanConfig,
2147 facts: &mut LineFacts,
2148 in_block_comment: &mut bool,
2149 string_state: &mut Option<StringState>,
2150) {
2151 let mut i = 0usize;
2152 while i < chars.len() {
2153 if let Some(state) = *string_state {
2155 facts.has_code = true;
2156 let (new_state, advance) = process_string_char(state, chars, i);
2157 *string_state = new_state;
2158 i += advance;
2159 continue;
2160 }
2161
2162 if *in_block_comment {
2164 facts.has_multi_comment = true;
2165 i += step_through_block_comment(chars, i, config.block_comment, in_block_comment);
2166 continue;
2167 }
2168
2169 if chars[i].is_whitespace() {
2171 i += 1;
2172 continue;
2173 }
2174
2175 if let Some((new_state, advance)) = try_open_string(chars, i, config) {
2177 facts.has_code = true;
2178 *string_state = Some(new_state);
2179 i += advance;
2180 continue;
2181 }
2182
2183 if let Some(advance) = try_open_block_comment(chars, i, config.block_comment) {
2185 facts.has_multi_comment = true;
2186 *in_block_comment = true;
2187 i += advance;
2188 continue;
2189 }
2190
2191 if config
2193 .line_comments
2194 .iter()
2195 .any(|prefix| starts_with(chars, i, prefix))
2196 {
2197 facts.has_single_comment = true;
2198 break;
2199 }
2200
2201 facts.has_code = true;
2203 i += 1;
2204 }
2205}
2206
2207fn finalize_line_facts(
2212 facts: LineFacts,
2213 trimmed: &str,
2214 raw: &mut RawLineCounts,
2215 ieee: IeeeFlags,
2216 in_block_comment: bool,
2217 string_state: Option<StringState>,
2218 pending_continuation: &mut Option<LineFacts>,
2219) -> Option<LineFacts> {
2220 if ieee.has_preprocessor_directives
2224 && facts.has_code
2225 && !facts.has_single_comment
2226 && !facts.has_multi_comment
2227 && trimmed.starts_with('#')
2228 {
2229 raw.compiler_directive_lines += 1;
2230 }
2231
2232 let is_continuation = ieee.collapse_continuation_lines
2235 && !in_block_comment
2236 && string_state.is_none()
2237 && trimmed.ends_with('\\');
2238
2239 if is_continuation {
2240 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
2241 pending.has_code |= facts.has_code;
2242 pending.has_single_comment |= facts.has_single_comment;
2243 pending.has_multi_comment |= facts.has_multi_comment;
2244 pending.has_docstring |= facts.has_docstring;
2245 return None; }
2247
2248 let emit = if let Some(pending) = pending_continuation.take() {
2250 LineFacts {
2251 has_code: pending.has_code | facts.has_code,
2252 has_single_comment: pending.has_single_comment | facts.has_single_comment,
2253 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
2254 has_docstring: pending.has_docstring | facts.has_docstring,
2255 }
2256 } else {
2257 facts
2258 };
2259 Some(emit)
2260}
2261
2262#[allow(clippy::needless_pass_by_value)]
2267#[allow(clippy::too_many_arguments)]
2268#[allow(clippy::many_single_char_names)] fn process_physical_line(
2270 line: &str,
2271 line_idx: usize,
2272 config: &ScanConfig,
2273 raw: &mut RawLineCounts,
2274 in_block_comment: &mut bool,
2275 string_state: &mut Option<StringState>,
2276 pending_continuation: &mut Option<LineFacts>,
2277 ieee: IeeeFlags,
2278) {
2279 raw.total_physical_lines += 1;
2280
2281 if config.skip_lines.contains(&line_idx) {
2282 raw.docstring_comment_lines += 1;
2283 return;
2284 }
2285
2286 let trimmed = line.trim();
2287 let mut facts = LineFacts::default();
2288
2289 if *in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
2293 facts.has_multi_comment = true;
2294 }
2295
2296 let chars: Vec<char> = line.chars().collect();
2297 scan_line(&chars, config, &mut facts, in_block_comment, string_state);
2298
2299 let Some(emit) = finalize_line_facts(
2300 facts,
2301 trimmed,
2302 raw,
2303 ieee,
2304 *in_block_comment,
2305 *string_state,
2306 pending_continuation,
2307 ) else {
2308 return;
2309 };
2310
2311 classify_line(raw, &emit, trimmed);
2312
2313 if emit.has_code {
2314 let (f, c, v, i, t, a, s) = count_symbols(&config.symbol_patterns, trimmed);
2315 raw.functions += f;
2316 raw.classes += c;
2317 raw.variables += v;
2318 raw.imports += i;
2319 raw.test_count += t;
2320 raw.test_assertion_count += a;
2321 raw.test_suite_count += s;
2322 }
2323}
2324
2325#[allow(clippy::needless_pass_by_value)]
2326fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
2327 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2328 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2329
2330 let mut raw = RawLineCounts::default();
2331 let mut warnings = Vec::new();
2332
2333 let mut in_block_comment = false;
2334 let mut string_state: Option<StringState> = None;
2335 let mut pending_continuation: Option<LineFacts> = None;
2337
2338 for (line_idx, line) in lines.iter().enumerate() {
2339 process_physical_line(
2340 line,
2341 line_idx,
2342 &config,
2343 &mut raw,
2344 &mut in_block_comment,
2345 &mut string_state,
2346 &mut pending_continuation,
2347 ieee,
2348 );
2349 }
2350
2351 if let Some(pending) = pending_continuation.take() {
2353 classify_line(&mut raw, &pending, "");
2354 }
2355
2356 if in_block_comment {
2357 warnings.push("unclosed block comment detected; result is best effort".into());
2358 }
2359 if string_state.is_some() {
2360 warnings.push("unclosed string literal detected; result is best effort".into());
2361 }
2362
2363 RawFileAnalysis {
2364 raw,
2365 parse_mode: if warnings.is_empty() {
2366 ParseMode::Lexical
2367 } else {
2368 ParseMode::LexicalBestEffort
2369 },
2370 warnings,
2371 style_analysis: None,
2372 }
2373}
2374
2375const fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
2376 if facts.has_docstring {
2377 raw.docstring_comment_lines += 1;
2378 } else if !facts.has_code
2379 && !facts.has_single_comment
2380 && !facts.has_multi_comment
2381 && trimmed.is_empty()
2382 {
2383 raw.blank_only_lines += 1;
2384 } else if facts.has_code && facts.has_single_comment {
2385 raw.mixed_code_single_comment_lines += 1;
2386 } else if facts.has_code && facts.has_multi_comment {
2387 raw.mixed_code_multi_comment_lines += 1;
2388 } else if facts.has_code {
2389 raw.code_only_lines += 1;
2390 } else if facts.has_single_comment {
2391 raw.single_comment_only_lines += 1;
2392 } else if facts.has_multi_comment {
2393 raw.multi_comment_only_lines += 1;
2394 } else if trimmed.is_empty() {
2395 raw.blank_only_lines += 1;
2396 } else {
2397 raw.skipped_unknown_lines += 1;
2398 }
2399}
2400
2401fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
2402 let hit = |pats: &[&str]| u64::from(pats.iter().any(|p| trimmed.starts_with(p)));
2403 let fn_pp = if patterns.functions_prefix_paren.is_empty() {
2406 0
2407 } else if let Some(paren_pos) = trimmed.find('(') {
2408 if trimmed[..paren_pos].contains('=') {
2409 0
2410 } else {
2411 hit(patterns.functions_prefix_paren)
2412 }
2413 } else {
2414 0
2415 };
2416 let test_hit = hit(patterns.tests);
2417 let fn_hit = if test_hit == 0 {
2424 hit(patterns.functions) | fn_pp
2425 } else {
2426 0
2427 };
2428 let class_hit = if test_hit == 0 {
2429 hit(patterns.classes)
2430 } else {
2431 0
2432 };
2433 (
2434 fn_hit,
2435 class_hit,
2436 hit(patterns.variables),
2437 hit(patterns.imports),
2438 test_hit,
2439 hit(patterns.assertions),
2440 hit(patterns.test_suites),
2441 )
2442}
2443
2444fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
2445 let needle_chars: Vec<char> = needle.chars().collect();
2446 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
2447}
2448
2449#[derive(Debug, Clone)]
2450struct PyContext {
2451 indent: usize,
2452 expect_docstring: bool,
2453}
2454
2455fn py_pop_outdented_contexts(contexts: &mut Vec<PyContext>, indent: usize) {
2457 while contexts.len() > 1 && indent < contexts.last().map_or(0, |c| c.indent) {
2458 contexts.pop();
2459 }
2460}
2461
2462fn py_handle_pending_indent(
2465 pending_block_indent: &mut Option<usize>,
2466 contexts: &mut Vec<PyContext>,
2467 indent: usize,
2468 trimmed: &str,
2469) {
2470 let Some(base_indent) = *pending_block_indent else {
2471 return;
2472 };
2473 if indent > base_indent {
2474 contexts.push(PyContext {
2475 indent,
2476 expect_docstring: true,
2477 });
2478 *pending_block_indent = None;
2479 } else if !trimmed.starts_with('@') {
2480 *pending_block_indent = None;
2481 }
2482}
2483
2484fn py_try_record_docstring(
2490 ctx: &mut PyContext,
2491 trimmed: &str,
2492 idx: usize,
2493 docstring_lines: &mut HashSet<usize>,
2494 active_docstring: &mut Option<(&'static str, usize)>,
2495) -> bool {
2496 if !ctx.expect_docstring {
2497 return false;
2498 }
2499 if let Some(delim) = docstring_delimiter(trimmed) {
2500 docstring_lines.insert(idx);
2501 ctx.expect_docstring = false;
2502 if !closes_triple_docstring(trimmed, delim, true) {
2503 *active_docstring = Some((delim, idx));
2504 }
2505 return true;
2506 }
2507 ctx.expect_docstring = false;
2508 false
2509}
2510
2511fn track_active_docstring(
2515 active_docstring: &mut Option<(&'static str, usize)>,
2516 docstring_lines: &mut HashSet<usize>,
2517 idx: usize,
2518 trimmed: &str,
2519) -> bool {
2520 let Some((delim, start_line)) = *active_docstring else {
2521 return false;
2522 };
2523 docstring_lines.insert(idx);
2524 if closes_triple_docstring(trimmed, delim, idx == start_line) {
2525 *active_docstring = None;
2526 }
2527 true
2528}
2529
2530fn try_record_docstring_if_context(
2533 contexts: &mut [PyContext],
2534 trimmed: &str,
2535 idx: usize,
2536 docstring_lines: &mut HashSet<usize>,
2537 active_docstring: &mut Option<(&'static str, usize)>,
2538) -> bool {
2539 let Some(ctx) = contexts.last_mut() else {
2540 return false;
2541 };
2542 py_try_record_docstring(ctx, trimmed, idx, docstring_lines, active_docstring)
2543}
2544
2545fn mark_unclosed_docstring_lines(
2547 active_docstring: Option<&(&'static str, usize)>,
2548 docstring_lines: &mut HashSet<usize>,
2549 num_lines: usize,
2550) {
2551 if let Some(&(_, start_line)) = active_docstring {
2552 for idx in start_line..num_lines {
2553 docstring_lines.insert(idx);
2554 }
2555 }
2556}
2557
2558fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
2559 let normalized = text.replace("\r\n", "\n").replace('\r', "\n");
2560 let lines: Vec<&str> = normalized.split_terminator('\n').collect();
2561
2562 let mut docstring_lines = HashSet::new();
2563 let mut contexts = vec![PyContext {
2564 indent: 0,
2565 expect_docstring: true,
2566 }];
2567 let mut pending_block_indent: Option<usize> = None;
2568 let mut active_docstring: Option<(&'static str, usize)> = None;
2569
2570 for (idx, line) in lines.iter().enumerate() {
2571 let trimmed = line.trim();
2572 let indent = leading_indent(line);
2573
2574 if track_active_docstring(&mut active_docstring, &mut docstring_lines, idx, trimmed) {
2575 continue;
2576 }
2577
2578 if trimmed.is_empty() || trimmed.starts_with('#') {
2580 continue;
2581 }
2582
2583 py_pop_outdented_contexts(&mut contexts, indent);
2584 py_handle_pending_indent(&mut pending_block_indent, &mut contexts, indent, trimmed);
2585
2586 if try_record_docstring_if_context(
2587 &mut contexts,
2588 trimmed,
2589 idx,
2590 &mut docstring_lines,
2591 &mut active_docstring,
2592 ) {
2593 continue;
2594 }
2595
2596 if is_python_block_header(trimmed) {
2597 pending_block_indent = Some(indent);
2598 }
2599 }
2600
2601 mark_unclosed_docstring_lines(active_docstring.as_ref(), &mut docstring_lines, lines.len());
2602
2603 docstring_lines
2604}
2605
2606fn leading_indent(line: &str) -> usize {
2607 line.chars().take_while(|c| c.is_whitespace()).count()
2608}
2609
2610fn is_python_block_header(trimmed: &str) -> bool {
2611 (trimmed.starts_with("def ")
2612 || trimmed.starts_with("async def ")
2613 || trimmed.starts_with("class "))
2614 && trimmed.ends_with(':')
2615}
2616
2617fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
2618 let mut idx = 0usize;
2619 let bytes = trimmed.as_bytes();
2620 while idx < bytes.len() {
2621 let c = bytes[idx] as char;
2622 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
2623 idx += 1;
2624 continue;
2625 }
2626 break;
2627 }
2628
2629 let rest = &trimmed[idx..];
2630 if rest.starts_with("\"\"\"") {
2631 Some("\"\"\"")
2632 } else if rest.starts_with("'''") {
2633 Some("'''")
2634 } else {
2635 None
2636 }
2637}
2638
2639fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
2640 let mut occurrences = 0usize;
2641 let mut search = trimmed;
2642 while let Some(index) = search.find(delim) {
2643 occurrences += 1;
2644 search = &search[index + delim.len()..];
2645 }
2646
2647 if same_line_as_start {
2648 occurrences >= 2
2649 } else {
2650 occurrences >= 1
2651 }
2652}
2653
2654#[cfg(feature = "tree-sitter")]
2659pub mod ts {
2660 use tree_sitter::Node;
2661
2662 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2663
2664 struct SymbolKinds {
2666 function_def: &'static str,
2668 class_def: &'static str,
2670 test_fn_prefix: &'static str,
2673 test_class_prefix: &'static str,
2676 assertion_attr_prefix: &'static str,
2680 }
2681
2682 impl SymbolKinds {
2683 const fn none() -> Self {
2684 Self {
2685 function_def: "",
2686 class_def: "",
2687 test_fn_prefix: "",
2688 test_class_prefix: "",
2689 assertion_attr_prefix: "",
2690 }
2691 }
2692 }
2693
2694 fn analyze_lines(
2700 text: &str,
2701 ts_language: &tree_sitter::Language,
2702 comment_node_kinds: &[&str],
2703 docstring_stmt_kind: Option<&str>,
2704 symbols: &SymbolKinds,
2705 ) -> Option<RawFileAnalysis> {
2706 let mut parser = tree_sitter::Parser::new();
2707 parser.set_language(ts_language).ok()?;
2708 let tree = parser.parse(text, None)?;
2709
2710 let lines: Vec<&str> = text.split_terminator('\n').collect();
2711 let n = lines.len();
2712
2713 let mut has_code = vec![false; n];
2714 let mut has_comment = vec![false; n];
2715 let mut comment_is_block = vec![false; n];
2716 let mut has_docstring = vec![false; n];
2717
2718 let mut ctx = VisitCtx {
2720 source: text.as_bytes(),
2721 comment_kinds: comment_node_kinds,
2722 docstring_stmt_kind,
2723 has_code: &mut has_code,
2724 has_comment: &mut has_comment,
2725 comment_is_block: &mut comment_is_block,
2726 has_docstring: &mut has_docstring,
2727 };
2728 visit(tree.root_node(), &mut ctx);
2729
2730 let mut raw = RawLineCounts::default();
2731 classify_ts_lines(
2732 &lines,
2733 &has_code,
2734 &has_comment,
2735 &comment_is_block,
2736 &has_docstring,
2737 &mut raw,
2738 );
2739
2740 if !symbols.function_def.is_empty() || !symbols.class_def.is_empty() {
2742 count_symbols(tree.root_node(), text.as_bytes(), symbols, &mut raw);
2743 }
2744
2745 Some(RawFileAnalysis {
2746 raw,
2747 parse_mode: ParseMode::TreeSitter,
2748 warnings: Vec::new(),
2749 })
2750 }
2751
2752 fn recurse_children(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2754 for i in 0..node.child_count() {
2755 #[allow(clippy::cast_possible_truncation)]
2756 if let Some(child) = node.child(i as u32) {
2757 count_symbols(child, source, kinds, raw);
2758 }
2759 }
2760 }
2761
2762 fn try_count_function(
2764 node: Node,
2765 source: &[u8],
2766 kinds: &SymbolKinds,
2767 raw: &mut RawLineCounts,
2768 ) -> bool {
2769 if kinds.function_def.is_empty() || node.kind() != kinds.function_def {
2770 return false;
2771 }
2772 let name = node
2773 .child_by_field_name("name")
2774 .and_then(|n| n.utf8_text(source).ok())
2775 .unwrap_or("");
2776 if !kinds.test_fn_prefix.is_empty() && name.starts_with(kinds.test_fn_prefix) {
2777 raw.test_count += 1;
2778 } else {
2779 raw.functions += 1;
2780 }
2781 recurse_children(node, source, kinds, raw);
2782 true
2783 }
2784
2785 fn try_count_class(
2787 node: Node,
2788 source: &[u8],
2789 kinds: &SymbolKinds,
2790 raw: &mut RawLineCounts,
2791 ) -> bool {
2792 if kinds.class_def.is_empty() || node.kind() != kinds.class_def {
2793 return false;
2794 }
2795 let name = node
2796 .child_by_field_name("name")
2797 .and_then(|n| n.utf8_text(source).ok())
2798 .unwrap_or("");
2799 if !kinds.test_class_prefix.is_empty() && name.starts_with(kinds.test_class_prefix) {
2800 raw.test_count += 1;
2801 } else {
2802 raw.classes += 1;
2803 }
2804 recurse_children(node, source, kinds, raw);
2805 true
2806 }
2807
2808 fn try_count_assertion(
2811 node: Node,
2812 source: &[u8],
2813 kinds: &SymbolKinds,
2814 raw: &mut RawLineCounts,
2815 ) -> bool {
2816 if kinds.assertion_attr_prefix.is_empty() || node.kind() != "call" {
2817 return false;
2818 }
2819 let Some(func) = node.child_by_field_name("function") else {
2820 return false;
2821 };
2822 if func.kind() != "attribute" {
2823 return false;
2824 }
2825 let attr_text = func
2826 .child_by_field_name("attribute")
2827 .and_then(|n| n.utf8_text(source).ok())
2828 .unwrap_or("");
2829 if !attr_text.starts_with(kinds.assertion_attr_prefix) {
2830 return false;
2831 }
2832 raw.test_assertion_count += 1;
2833 true
2834 }
2835
2836 fn count_symbols(node: Node, source: &[u8], kinds: &SymbolKinds, raw: &mut RawLineCounts) {
2839 if try_count_function(node, source, kinds, raw) {
2840 return;
2841 }
2842 if try_count_class(node, source, kinds, raw) {
2843 return;
2844 }
2845 if try_count_assertion(node, source, kinds, raw) {
2846 return;
2847 }
2848 recurse_children(node, source, kinds, raw);
2849 }
2850
2851 #[allow(clippy::struct_excessive_bools)]
2854 #[derive(Clone, Copy)]
2855 struct TsLineFlags {
2856 has_code: bool,
2857 has_comment: bool,
2858 comment_is_block: bool,
2859 has_docstring: bool,
2860 }
2861
2862 const fn classify_ts_line(trimmed: &str, flags: TsLineFlags, raw: &mut RawLineCounts) {
2864 if trimmed.is_empty() {
2865 raw.blank_only_lines += 1;
2866 } else if flags.has_docstring && !flags.has_code {
2867 raw.docstring_comment_lines += 1;
2868 } else if flags.has_code && flags.has_comment {
2869 if flags.comment_is_block {
2871 raw.mixed_code_multi_comment_lines += 1;
2872 } else {
2873 raw.mixed_code_single_comment_lines += 1;
2874 }
2875 } else if flags.has_comment {
2876 if flags.comment_is_block {
2877 raw.multi_comment_only_lines += 1;
2878 } else {
2879 raw.single_comment_only_lines += 1;
2880 }
2881 } else {
2882 raw.code_only_lines += 1;
2883 }
2884 }
2885
2886 fn classify_ts_lines(
2888 lines: &[&str],
2889 has_code: &[bool],
2890 has_comment: &[bool],
2891 comment_is_block: &[bool],
2892 has_docstring: &[bool],
2893 raw: &mut RawLineCounts,
2894 ) {
2895 for i in 0..lines.len() {
2896 raw.total_physical_lines += 1;
2897 classify_ts_line(
2898 lines[i].trim(),
2899 TsLineFlags {
2900 has_code: has_code[i],
2901 has_comment: has_comment[i],
2902 comment_is_block: comment_is_block[i],
2903 has_docstring: has_docstring[i],
2904 },
2905 raw,
2906 );
2907 }
2908 }
2909
2910 struct VisitCtx<'a> {
2911 source: &'a [u8],
2912 comment_kinds: &'a [&'a str],
2913 docstring_stmt_kind: Option<&'a str>,
2914 has_code: &'a mut Vec<bool>,
2915 has_comment: &'a mut Vec<bool>,
2916 comment_is_block: &'a mut Vec<bool>,
2917 has_docstring: &'a mut Vec<bool>,
2918 }
2919
2920 fn visit_comment_node(node: Node, ctx: &mut VisitCtx<'_>) {
2922 let start_row = node.start_position().row;
2923 let end_row = node.end_position().row;
2924 let first_two = node
2925 .utf8_text(ctx.source)
2926 .unwrap_or("")
2927 .get(..2)
2928 .unwrap_or("");
2929 let is_block = first_two == "/*" || first_two == "<#";
2930 for row in start_row..=end_row {
2931 if row < ctx.has_comment.len() {
2932 ctx.has_comment[row] = true;
2933 if is_block {
2934 ctx.comment_is_block[row] = true;
2935 }
2936 }
2937 }
2938 }
2939
2940 fn visit_maybe_docstring(node: Node, kind: &str, ctx: &mut VisitCtx<'_>) -> bool {
2943 let Some(stmt_kind) = ctx.docstring_stmt_kind else {
2944 return false;
2945 };
2946 if kind != stmt_kind || node.named_child_count() != 1 {
2947 return false;
2948 }
2949 let Some(child) = node.named_child(0) else {
2950 return false;
2951 };
2952 if child.kind() != "string" {
2953 return false;
2954 }
2955 let child_start = child.start_position().row;
2956 let child_end = child.end_position().row;
2957 for row in child_start..=child_end {
2958 if row < ctx.has_docstring.len() {
2959 ctx.has_docstring[row] = true;
2960 }
2961 }
2962 true
2963 }
2964
2965 fn visit_leaf_code(node: Node, ctx: &mut VisitCtx<'_>) {
2967 let start_row = node.start_position().row;
2968 let end_row = node.end_position().row;
2969 for row in start_row..=end_row {
2970 if row < ctx.has_code.len() {
2971 ctx.has_code[row] = true;
2972 }
2973 }
2974 }
2975
2976 #[allow(clippy::too_many_lines)]
2977 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2978 let kind = node.kind();
2979
2980 if ctx.comment_kinds.contains(&kind) {
2982 visit_comment_node(node, ctx);
2983 return;
2984 }
2985
2986 if visit_maybe_docstring(node, kind, ctx) {
2988 return;
2989 }
2990
2991 if node.child_count() == 0 && !node.is_extra() {
2993 visit_leaf_code(node, ctx);
2994 return;
2995 }
2996
2997 for i in 0..node.child_count() {
2998 #[allow(clippy::cast_possible_truncation)]
2999 if let Some(child) = node.child(i as u32) {
3001 visit(child, ctx);
3002 }
3003 }
3004 }
3005
3006 const C_SYMBOLS: SymbolKinds = SymbolKinds::none();
3007
3008 const PYTHON_SYMBOLS: SymbolKinds = SymbolKinds {
3009 function_def: "function_definition",
3010 class_def: "class_definition",
3011 test_fn_prefix: "test_",
3012 test_class_prefix: "Test",
3013 assertion_attr_prefix: "assert",
3014 };
3015
3016 #[must_use]
3018 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
3019 let lang: tree_sitter::Language = tree_sitter_c::LANGUAGE.into();
3020 analyze_lines(text, &lang, &["comment"], None, &C_SYMBOLS)
3021 }
3022
3023 #[must_use]
3025 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
3026 let lang: tree_sitter::Language = tree_sitter_python::LANGUAGE.into();
3027 analyze_lines(
3028 text,
3029 &lang,
3030 &["comment"],
3031 Some("expression_statement"),
3032 &PYTHON_SYMBOLS,
3033 )
3034 }
3035}
3036
3037#[cfg(test)]
3038mod tests {
3039 use super::*;
3040
3041 #[test]
3042 fn python_docstrings_are_separated() {
3043 let input = r#""""module docs"""
3044
3045
3046def fn_a():
3047 """function docs"""
3048 value = 1 # trailing comment
3049 return value
3050"#;
3051
3052 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
3053 assert_eq!(result.raw.docstring_comment_lines, 2);
3054 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3055 assert_eq!(result.raw.code_only_lines, 2);
3056 }
3057
3058 #[test]
3059 fn c_style_mixed_lines_are_captured() {
3060 let input = "int x = 1; // note\n/* block */\n";
3061 let result = analyze_text(Language::C, input, AnalysisOptions::default());
3062 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
3063 assert_eq!(result.raw.multi_comment_only_lines, 1);
3064 }
3065
3066 #[test]
3067 fn detect_language_by_shebang() {
3068 let language = detect_language(
3069 Path::new("script"),
3070 Some("#!/usr/bin/env bash"),
3071 &BTreeMap::new(),
3072 true,
3073 );
3074 assert_eq!(language, Some(Language::Shell));
3075 }
3076
3077 fn sym(lang: Language, line: &str) -> (u64, u64, u64, u64, u64, u64, u64) {
3080 let result = analyze_text(lang, &format!("{line}\n"), AnalysisOptions::default());
3081 let r = &result.raw;
3082 (
3083 r.functions,
3084 r.classes,
3085 r.variables,
3086 r.imports,
3087 r.test_count,
3088 r.test_assertion_count,
3089 r.test_suite_count,
3090 )
3091 }
3092
3093 #[test]
3094 fn python_test_fn_not_double_counted() {
3095 let (f, c, _, _, t, _, _) = sym(Language::Python, "def test_foo():");
3097 assert_eq!(f, 0, "test fn must not also increment functions");
3098 assert_eq!(t, 1, "must be counted as a test");
3099 assert_eq!(c, 0);
3100 }
3101
3102 #[test]
3103 fn python_test_class_not_double_counted() {
3104 let (f, c, _, _, t, _, _) = sym(Language::Python, "class TestFoo:");
3106 assert_eq!(c, 0, "test class must not also increment classes");
3107 assert_eq!(t, 1, "must be counted as a test");
3108 assert_eq!(f, 0);
3109 }
3110
3111 #[test]
3112 fn python_regular_fn_counts_as_function() {
3113 let (f, c, _, _, t, _, _) = sym(Language::Python, "def regular():");
3114 assert_eq!(f, 1, "regular function must be counted");
3115 assert_eq!(t, 0);
3116 assert_eq!(c, 0);
3117 }
3118
3119 #[test]
3120 fn python_regular_class_counts_as_class() {
3121 let (f, c, _, _, t, _, _) = sym(Language::Python, "class Regular:");
3122 assert_eq!(c, 1, "regular class must be counted");
3123 assert_eq!(t, 0);
3124 assert_eq!(f, 0);
3125 }
3126
3127 #[test]
3128 fn go_test_fn_not_double_counted() {
3129 let (f, _, _, _, t, _, _) = sym(Language::Go, "func TestFoo(t *testing.T) {");
3130 assert_eq!(f, 0, "Go test func must not also increment functions");
3131 assert_eq!(t, 1, "must be counted as a test");
3132 }
3133
3134 #[test]
3135 fn go_benchmark_fn_not_double_counted() {
3136 let (f, _, _, _, t, _, _) = sym(Language::Go, "func BenchmarkBar(b *testing.B) {");
3137 assert_eq!(f, 0, "Go benchmark func must not also increment functions");
3138 assert_eq!(t, 1, "must be counted as a test");
3139 }
3140
3141 #[test]
3142 fn go_regular_fn_counts_as_function() {
3143 let (f, _, _, _, t, _, _) = sym(Language::Go, "func doSomething() {");
3144 assert_eq!(f, 1, "regular Go func must be counted");
3145 assert_eq!(t, 0);
3146 }
3147
3148 #[test]
3149 fn rust_test_attr_counts_as_test_not_function() {
3150 let (f, _, _, _, t, _, _) = sym(Language::Rust, "#[test]");
3152 assert_eq!(t, 1, "#[test] must be counted as a test");
3153 assert_eq!(f, 0, "#[test] attribute must not be counted as a function");
3154 }
3155
3156 #[test]
3157 fn rust_fn_line_counts_as_function_not_test() {
3158 let (f, _, _, _, t, _, _) = sym(Language::Rust, "fn test_something() {");
3160 assert_eq!(f, 1, "fn declaration must count as a function");
3161 assert_eq!(
3162 t, 0,
3163 "fn declaration line must not be double-counted as a test"
3164 );
3165 }
3166
3167 #[test]
3168 fn js_describe_counts_as_test_not_function() {
3169 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "describe('suite', () => {");
3170 assert_eq!(t, 1, "describe must be counted as a test");
3171 assert_eq!(f, 0, "describe must not be counted as a function");
3172 }
3173
3174 #[test]
3175 fn js_regular_fn_counts_as_function() {
3176 let (f, _, _, _, t, _, _) = sym(Language::JavaScript, "function doWork() {");
3177 assert_eq!(f, 1, "JS function declaration must be counted");
3178 assert_eq!(t, 0);
3179 }
3180}