1use std::collections::{BTreeMap, BTreeSet, HashSet};
5use std::path::Path;
6
7use serde::{Deserialize, Serialize};
8
9#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
10#[serde(rename_all = "snake_case")]
11pub enum Language {
12 C,
13 Cpp,
14 CSharp,
15 Go,
16 Java,
17 JavaScript,
18 Python,
19 Rust,
20 Shell,
21 PowerShell,
22 TypeScript,
23 Assembly,
25 Clojure,
26 Css,
27 Dart,
28 Dockerfile,
29 Elixir,
30 Erlang,
31 FSharp,
32 Groovy,
33 Haskell,
34 Html,
35 Julia,
36 Kotlin,
37 Lua,
38 Makefile,
39 Nim,
40 ObjectiveC,
41 Ocaml,
42 Perl,
43 Php,
44 R,
45 Ruby,
46 Scala,
47 Scss,
48 Sql,
49 Svelte,
50 Swift,
51 Vue,
52 Xml,
53 Zig,
54}
55
56impl Language {
57 pub fn display_name(&self) -> &'static str {
58 match self {
59 Language::C => "C",
60 Language::Cpp => "C++",
61 Language::CSharp => "C#",
62 Language::Go => "Go",
63 Language::Java => "Java",
64 Language::JavaScript => "JavaScript",
65 Language::Python => "Python",
66 Language::Rust => "Rust",
67 Language::Shell => "Shell",
68 Language::PowerShell => "PowerShell",
69 Language::TypeScript => "TypeScript",
70 Language::Assembly => "Assembly",
71 Language::Clojure => "Clojure",
72 Language::Css => "CSS",
73 Language::Dart => "Dart",
74 Language::Dockerfile => "Dockerfile",
75 Language::Elixir => "Elixir",
76 Language::Erlang => "Erlang",
77 Language::FSharp => "F#",
78 Language::Groovy => "Groovy",
79 Language::Haskell => "Haskell",
80 Language::Html => "HTML",
81 Language::Julia => "Julia",
82 Language::Kotlin => "Kotlin",
83 Language::Lua => "Lua",
84 Language::Makefile => "Makefile",
85 Language::Nim => "Nim",
86 Language::ObjectiveC => "Objective-C",
87 Language::Ocaml => "OCaml",
88 Language::Perl => "Perl",
89 Language::Php => "PHP",
90 Language::R => "R",
91 Language::Ruby => "Ruby",
92 Language::Scala => "Scala",
93 Language::Scss => "SCSS",
94 Language::Sql => "SQL",
95 Language::Svelte => "Svelte",
96 Language::Swift => "Swift",
97 Language::Vue => "Vue",
98 Language::Xml => "XML",
99 Language::Zig => "Zig",
100 }
101 }
102
103 pub fn as_slug(&self) -> &'static str {
104 match self {
105 Language::C => "c",
106 Language::Cpp => "cpp",
107 Language::CSharp => "csharp",
108 Language::Go => "go",
109 Language::Java => "java",
110 Language::JavaScript => "javascript",
111 Language::Python => "python",
112 Language::Rust => "rust",
113 Language::Shell => "shell",
114 Language::PowerShell => "powershell",
115 Language::TypeScript => "typescript",
116 Language::Assembly => "assembly",
117 Language::Clojure => "clojure",
118 Language::Css => "css",
119 Language::Dart => "dart",
120 Language::Dockerfile => "dockerfile",
121 Language::Elixir => "elixir",
122 Language::Erlang => "erlang",
123 Language::FSharp => "fsharp",
124 Language::Groovy => "groovy",
125 Language::Haskell => "haskell",
126 Language::Html => "html",
127 Language::Julia => "julia",
128 Language::Kotlin => "kotlin",
129 Language::Lua => "lua",
130 Language::Makefile => "makefile",
131 Language::Nim => "nim",
132 Language::ObjectiveC => "objectivec",
133 Language::Ocaml => "ocaml",
134 Language::Perl => "perl",
135 Language::Php => "php",
136 Language::R => "r",
137 Language::Ruby => "ruby",
138 Language::Scala => "scala",
139 Language::Scss => "scss",
140 Language::Sql => "sql",
141 Language::Svelte => "svelte",
142 Language::Swift => "swift",
143 Language::Vue => "vue",
144 Language::Xml => "xml",
145 Language::Zig => "zig",
146 }
147 }
148
149 pub fn from_name(name: &str) -> Option<Self> {
150 match name.trim().to_ascii_lowercase().as_str() {
151 "c" => Some(Language::C),
152 "cpp" | "c++" | "cplusplus" => Some(Language::Cpp),
153 "csharp" | "c#" | "cs" => Some(Language::CSharp),
154 "go" | "golang" => Some(Language::Go),
155 "java" => Some(Language::Java),
156 "javascript" | "js" => Some(Language::JavaScript),
157 "python" | "py" => Some(Language::Python),
158 "rust" | "rs" => Some(Language::Rust),
159 "shell" | "sh" | "bash" => Some(Language::Shell),
160 "powershell" | "pwsh" | "ps" => Some(Language::PowerShell),
161 "typescript" | "ts" => Some(Language::TypeScript),
162 "assembly" | "asm" => Some(Language::Assembly),
163 "clojure" | "clj" => Some(Language::Clojure),
164 "css" => Some(Language::Css),
165 "dart" => Some(Language::Dart),
166 "dockerfile" | "docker" => Some(Language::Dockerfile),
167 "elixir" | "ex" => Some(Language::Elixir),
168 "erlang" | "erl" => Some(Language::Erlang),
169 "fsharp" | "f#" | "fs" => Some(Language::FSharp),
170 "groovy" => Some(Language::Groovy),
171 "haskell" | "hs" => Some(Language::Haskell),
172 "html" | "htm" => Some(Language::Html),
173 "julia" | "jl" => Some(Language::Julia),
174 "kotlin" | "kt" => Some(Language::Kotlin),
175 "lua" => Some(Language::Lua),
176 "makefile" | "make" | "mk" => Some(Language::Makefile),
177 "nim" => Some(Language::Nim),
178 "objectivec" | "objc" | "objective-c" => Some(Language::ObjectiveC),
179 "ocaml" | "ml" => Some(Language::Ocaml),
180 "perl" | "pl" => Some(Language::Perl),
181 "php" => Some(Language::Php),
182 "r" => Some(Language::R),
183 "ruby" | "rb" => Some(Language::Ruby),
184 "scala" => Some(Language::Scala),
185 "scss" | "sass" => Some(Language::Scss),
186 "sql" => Some(Language::Sql),
187 "svelte" => Some(Language::Svelte),
188 "swift" => Some(Language::Swift),
189 "vue" => Some(Language::Vue),
190 "xml" => Some(Language::Xml),
191 "zig" => Some(Language::Zig),
192 _ => None,
193 }
194 }
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize, Default)]
198pub struct RawLineCounts {
199 pub total_physical_lines: u64,
200 pub blank_only_lines: u64,
201 pub code_only_lines: u64,
202 pub single_comment_only_lines: u64,
203 pub multi_comment_only_lines: u64,
204 pub mixed_code_single_comment_lines: u64,
205 pub mixed_code_multi_comment_lines: u64,
206 pub docstring_comment_lines: u64,
207 pub skipped_unknown_lines: u64,
208 #[serde(default)]
210 pub functions: u64,
211 #[serde(default)]
213 pub classes: u64,
214 #[serde(default)]
216 pub variables: u64,
217 #[serde(default)]
219 pub imports: u64,
220 #[serde(default)]
224 pub compiler_directive_lines: u64,
225}
226
227#[derive(Debug, Clone, Copy, Serialize, Deserialize)]
228#[serde(rename_all = "snake_case")]
229pub enum ParseMode {
230 Lexical,
231 LexicalBestEffort,
232 TreeSitter,
233}
234
235#[derive(Debug, Clone, Serialize, Deserialize)]
236pub struct RawFileAnalysis {
237 pub raw: RawLineCounts,
238 pub parse_mode: ParseMode,
239 pub warnings: Vec<String>,
240}
241
242#[derive(Debug, Clone, Copy)]
247pub struct AnalysisOptions {
248 pub blank_in_block_comment_as_comment: bool,
251 pub collapse_continuation_lines: bool,
254}
255
256impl Default for AnalysisOptions {
257 fn default() -> Self {
258 Self {
259 blank_in_block_comment_as_comment: true,
260 collapse_continuation_lines: false,
261 }
262 }
263}
264
265pub fn supported_languages() -> BTreeSet<Language> {
266 [
267 Language::Assembly,
268 Language::C,
269 Language::Clojure,
270 Language::Cpp,
271 Language::CSharp,
272 Language::Css,
273 Language::Dart,
274 Language::Dockerfile,
275 Language::Elixir,
276 Language::Erlang,
277 Language::FSharp,
278 Language::Go,
279 Language::Groovy,
280 Language::Haskell,
281 Language::Html,
282 Language::Java,
283 Language::JavaScript,
284 Language::Julia,
285 Language::Kotlin,
286 Language::Lua,
287 Language::Makefile,
288 Language::Nim,
289 Language::ObjectiveC,
290 Language::Ocaml,
291 Language::Perl,
292 Language::Php,
293 Language::PowerShell,
294 Language::Python,
295 Language::R,
296 Language::Ruby,
297 Language::Rust,
298 Language::Scala,
299 Language::Scss,
300 Language::Shell,
301 Language::Sql,
302 Language::Svelte,
303 Language::Swift,
304 Language::TypeScript,
305 Language::Vue,
306 Language::Xml,
307 Language::Zig,
308 ]
309 .into_iter()
310 .collect()
311}
312
313pub fn detect_language(
314 path: &Path,
315 first_line: Option<&str>,
316 extension_overrides: &BTreeMap<String, String>,
317 shebang_detection: bool,
318) -> Option<Language> {
319 let extension = path
320 .extension()
321 .and_then(|ext| ext.to_str())
322 .map(|ext| ext.to_ascii_lowercase());
323
324 if let Some(ext) = extension.as_ref() {
326 if let Some(override_name) = extension_overrides.get(ext.as_str()) {
327 if let Some(lang) = Language::from_name(override_name) {
328 return Some(lang);
329 }
330 }
331 }
332
333 let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
335 let filename = path.file_name().and_then(|s| s.to_str()).unwrap_or("");
336 let filename_lower = filename.to_ascii_lowercase();
337
338 if filename == "Dockerfile"
340 || filename.starts_with("Dockerfile.")
341 || filename_lower == "dockerfile"
342 {
343 return Some(Language::Dockerfile);
344 }
345
346 if matches!(
348 filename,
349 "Makefile" | "GNUmakefile" | "makefile" | "BSDmakefile"
350 ) {
351 return Some(Language::Makefile);
352 }
353
354 if matches!(
356 filename,
357 "Rakefile" | "Gemfile" | "Guardfile" | "Vagrantfile" | "Fastfile" | "Podfile"
358 ) {
359 return Some(Language::Ruby);
360 }
361
362 let _ = stem; if let Some(ext) = extension.as_deref() {
366 let by_ext = match ext {
367 "c" | "h" => Some(Language::C),
369 "cc" | "cp" | "cpp" | "cxx" | "hh" | "hpp" | "hxx" => Some(Language::Cpp),
370 "cs" => Some(Language::CSharp),
371 "go" => Some(Language::Go),
372 "java" => Some(Language::Java),
373 "js" | "mjs" | "cjs" => Some(Language::JavaScript),
374 "py" => Some(Language::Python),
375 "rs" => Some(Language::Rust),
376 "sh" | "bash" | "zsh" | "ksh" => Some(Language::Shell),
377 "ps1" | "psm1" | "psd1" => Some(Language::PowerShell),
378 "ts" | "mts" | "cts" => Some(Language::TypeScript),
379 "asm" | "s" => Some(Language::Assembly),
381 "clj" | "cljs" | "cljc" | "edn" => Some(Language::Clojure),
382 "css" => Some(Language::Css),
383 "dart" => Some(Language::Dart),
384 "ex" | "exs" => Some(Language::Elixir),
385 "erl" | "hrl" => Some(Language::Erlang),
386 "fs" | "fsi" | "fsx" => Some(Language::FSharp),
387 "groovy" | "gradle" => Some(Language::Groovy),
388 "hs" | "lhs" => Some(Language::Haskell),
389 "html" | "htm" | "xhtml" => Some(Language::Html),
390 "jl" => Some(Language::Julia),
391 "kt" | "kts" => Some(Language::Kotlin),
392 "lua" => Some(Language::Lua),
393 "mk" => Some(Language::Makefile),
394 "nim" | "nims" => Some(Language::Nim),
395 "m" | "mm" => Some(Language::ObjectiveC),
396 "ml" | "mli" => Some(Language::Ocaml),
397 "pl" | "pm" | "t" => Some(Language::Perl),
398 "php" | "php3" | "php4" | "php5" | "php7" | "phtml" => Some(Language::Php),
399 "r" => Some(Language::R),
400 "rb" | "rake" => Some(Language::Ruby),
401 "scala" | "sc" => Some(Language::Scala),
402 "scss" | "sass" => Some(Language::Scss),
403 "sql" => Some(Language::Sql),
404 "svelte" => Some(Language::Svelte),
405 "swift" => Some(Language::Swift),
406 "vue" => Some(Language::Vue),
407 "xml" | "xsd" | "xsl" | "xslt" | "svg" => Some(Language::Xml),
408 "zig" => Some(Language::Zig),
409 _ => None,
410 };
411
412 if by_ext.is_some() {
413 return by_ext;
414 }
415 }
416
417 if shebang_detection {
418 if let Some(line) = first_line {
419 let lower = line.to_ascii_lowercase();
420 if lower.starts_with("#!") {
421 if lower.contains("python") {
422 return Some(Language::Python);
423 }
424 if lower.contains("pwsh") || lower.contains("powershell") {
425 return Some(Language::PowerShell);
426 }
427 if lower.contains("bash")
428 || lower.contains("/sh")
429 || lower.contains("zsh")
430 || lower.contains("ksh")
431 {
432 return Some(Language::Shell);
433 }
434 if lower.contains("ruby") {
435 return Some(Language::Ruby);
436 }
437 if lower.contains("perl") {
438 return Some(Language::Perl);
439 }
440 if lower.contains("php") {
441 return Some(Language::Php);
442 }
443 if lower.contains("node") || lower.contains("nodejs") {
444 return Some(Language::JavaScript);
445 }
446 }
447 }
448 }
449
450 None
451}
452
453pub fn analyze_text(language: Language, text: &str, options: AnalysisOptions) -> RawFileAnalysis {
454 let base = IeeeFlags {
456 has_preprocessor_directives: false,
457 blank_in_block_comment_as_comment: options.blank_in_block_comment_as_comment,
458 collapse_continuation_lines: options.collapse_continuation_lines,
459 };
460 let cpp = IeeeFlags {
463 has_preprocessor_directives: true,
464 ..base
465 };
466
467 match language {
468 Language::C => {
469 #[cfg(feature = "tree-sitter")]
470 if let Some(result) = ts::analyze_c(text) {
471 return result;
472 }
473 analyze_generic(
474 text,
475 ScanConfig {
476 line_comments: &["//"],
477 block_comment: Some(("/*", "*/")),
478 allow_single_quote_strings: true,
479 allow_double_quote_strings: true,
480 allow_triple_quote_strings: false,
481 allow_csharp_verbatim_strings: false,
482 skip_lines: HashSet::new(),
483 symbol_patterns: SP_C,
484 },
485 cpp,
486 )
487 }
488 Language::Cpp => {
489 #[cfg(feature = "tree-sitter")]
491 if let Some(result) = ts::analyze_c(text) {
492 return result;
493 }
494 analyze_generic(
495 text,
496 ScanConfig {
497 line_comments: &["//"],
498 block_comment: Some(("/*", "*/")),
499 allow_single_quote_strings: true,
500 allow_double_quote_strings: true,
501 allow_triple_quote_strings: false,
502 allow_csharp_verbatim_strings: false,
503 skip_lines: HashSet::new(),
504 symbol_patterns: SP_CPP,
505 },
506 cpp,
507 )
508 }
509 Language::CSharp => analyze_generic(
510 text,
511 ScanConfig {
512 line_comments: &["//"],
513 block_comment: Some(("/*", "*/")),
514 allow_single_quote_strings: true,
515 allow_double_quote_strings: true,
516 allow_triple_quote_strings: false,
517 allow_csharp_verbatim_strings: true,
518 skip_lines: HashSet::new(),
519 symbol_patterns: SP_CSHARP,
520 },
521 base,
522 ),
523 Language::Go => analyze_generic(
524 text,
525 ScanConfig {
526 line_comments: &["//"],
527 block_comment: Some(("/*", "*/")),
528 allow_single_quote_strings: true,
529 allow_double_quote_strings: true,
530 allow_triple_quote_strings: false,
531 allow_csharp_verbatim_strings: false,
532 skip_lines: HashSet::new(),
533 symbol_patterns: SP_GO,
534 },
535 base,
536 ),
537 Language::Java => analyze_generic(
538 text,
539 ScanConfig {
540 line_comments: &["//"],
541 block_comment: Some(("/*", "*/")),
542 allow_single_quote_strings: true,
543 allow_double_quote_strings: true,
544 allow_triple_quote_strings: false,
545 allow_csharp_verbatim_strings: false,
546 skip_lines: HashSet::new(),
547 symbol_patterns: SP_JAVA,
548 },
549 base,
550 ),
551 Language::JavaScript => analyze_generic(
552 text,
553 ScanConfig {
554 line_comments: &["//"],
555 block_comment: Some(("/*", "*/")),
556 allow_single_quote_strings: true,
557 allow_double_quote_strings: true,
558 allow_triple_quote_strings: false,
559 allow_csharp_verbatim_strings: false,
560 skip_lines: HashSet::new(),
561 symbol_patterns: SP_JS,
562 },
563 base,
564 ),
565 Language::Rust => analyze_generic(
566 text,
567 ScanConfig {
568 line_comments: &["//"],
570 block_comment: Some(("/*", "*/")),
571 allow_single_quote_strings: false,
572 allow_double_quote_strings: true,
573 allow_triple_quote_strings: false,
574 allow_csharp_verbatim_strings: false,
575 skip_lines: HashSet::new(),
576 symbol_patterns: SP_RUST,
577 },
578 base,
579 ),
580 Language::Shell => analyze_generic(
581 text,
582 ScanConfig {
583 line_comments: &["#"],
584 block_comment: None,
585 allow_single_quote_strings: true,
586 allow_double_quote_strings: true,
587 allow_triple_quote_strings: false,
588 allow_csharp_verbatim_strings: false,
589 skip_lines: HashSet::new(),
590 symbol_patterns: SP_SHELL,
591 },
592 base,
593 ),
594 Language::PowerShell => analyze_generic(
595 text,
596 ScanConfig {
597 line_comments: &["#"],
598 block_comment: Some(("<#", "#>")),
599 allow_single_quote_strings: true,
600 allow_double_quote_strings: true,
601 allow_triple_quote_strings: false,
602 allow_csharp_verbatim_strings: false,
603 skip_lines: HashSet::new(),
604 symbol_patterns: SP_POWERSHELL,
605 },
606 base,
607 ),
608 Language::TypeScript => analyze_generic(
609 text,
610 ScanConfig {
611 line_comments: &["//"],
612 block_comment: Some(("/*", "*/")),
613 allow_single_quote_strings: true,
614 allow_double_quote_strings: true,
615 allow_triple_quote_strings: false,
616 allow_csharp_verbatim_strings: false,
617 skip_lines: HashSet::new(),
618 symbol_patterns: SP_TS,
619 },
620 base,
621 ),
622 Language::Python => {
623 #[cfg(feature = "tree-sitter")]
624 if let Some(result) = ts::analyze_python(text) {
625 return result;
626 }
627 let docstring_lines = detect_python_docstring_lines(text);
628 analyze_generic(
629 text,
630 ScanConfig {
631 line_comments: &["#"],
632 block_comment: None,
633 allow_single_quote_strings: true,
634 allow_double_quote_strings: true,
635 allow_triple_quote_strings: true,
636 allow_csharp_verbatim_strings: false,
637 skip_lines: docstring_lines,
638 symbol_patterns: SP_PYTHON,
639 },
640 base,
641 )
642 }
643 Language::Assembly => analyze_generic(
645 text,
646 ScanConfig {
647 line_comments: &[";"],
648 block_comment: None,
649 allow_single_quote_strings: false,
650 allow_double_quote_strings: false,
651 allow_triple_quote_strings: false,
652 allow_csharp_verbatim_strings: false,
653 skip_lines: HashSet::new(),
654 symbol_patterns: SP_ASSEMBLY,
655 },
656 base,
657 ),
658 Language::Clojure => analyze_generic(
659 text,
660 ScanConfig {
661 line_comments: &[";"],
662 block_comment: None,
663 allow_single_quote_strings: false,
664 allow_double_quote_strings: true,
665 allow_triple_quote_strings: false,
666 allow_csharp_verbatim_strings: false,
667 skip_lines: HashSet::new(),
668 symbol_patterns: SP_CLOJURE,
669 },
670 base,
671 ),
672 Language::Css => analyze_generic(
673 text,
674 ScanConfig {
675 line_comments: &[],
676 block_comment: Some(("/*", "*/")),
677 allow_single_quote_strings: true,
678 allow_double_quote_strings: true,
679 allow_triple_quote_strings: false,
680 allow_csharp_verbatim_strings: false,
681 skip_lines: HashSet::new(),
682 symbol_patterns: SP_NONE,
683 },
684 base,
685 ),
686 Language::Dart => analyze_generic(
687 text,
688 ScanConfig {
689 line_comments: &["//"],
690 block_comment: Some(("/*", "*/")),
691 allow_single_quote_strings: true,
692 allow_double_quote_strings: true,
693 allow_triple_quote_strings: false,
694 allow_csharp_verbatim_strings: false,
695 skip_lines: HashSet::new(),
696 symbol_patterns: SP_DART,
697 },
698 base,
699 ),
700 Language::Dockerfile => analyze_generic(
701 text,
702 ScanConfig {
703 line_comments: &["#"],
704 block_comment: None,
705 allow_single_quote_strings: false,
706 allow_double_quote_strings: false,
707 allow_triple_quote_strings: false,
708 allow_csharp_verbatim_strings: false,
709 skip_lines: HashSet::new(),
710 symbol_patterns: SP_NONE,
711 },
712 base,
713 ),
714 Language::Elixir => analyze_generic(
715 text,
716 ScanConfig {
717 line_comments: &["#"],
718 block_comment: None,
719 allow_single_quote_strings: true,
720 allow_double_quote_strings: true,
721 allow_triple_quote_strings: false,
722 allow_csharp_verbatim_strings: false,
723 skip_lines: HashSet::new(),
724 symbol_patterns: SP_ELIXIR,
725 },
726 base,
727 ),
728 Language::Erlang => analyze_generic(
729 text,
730 ScanConfig {
731 line_comments: &["%"],
732 block_comment: None,
733 allow_single_quote_strings: false,
734 allow_double_quote_strings: true,
735 allow_triple_quote_strings: false,
736 allow_csharp_verbatim_strings: false,
737 skip_lines: HashSet::new(),
738 symbol_patterns: SP_ERLANG,
739 },
740 base,
741 ),
742 Language::FSharp => analyze_generic(
743 text,
744 ScanConfig {
745 line_comments: &["//"],
746 block_comment: Some(("(*", "*)")),
747 allow_single_quote_strings: false,
748 allow_double_quote_strings: true,
749 allow_triple_quote_strings: false,
750 allow_csharp_verbatim_strings: false,
751 skip_lines: HashSet::new(),
752 symbol_patterns: SP_FSHARP,
753 },
754 base,
755 ),
756 Language::Groovy => analyze_generic(
757 text,
758 ScanConfig {
759 line_comments: &["//"],
760 block_comment: Some(("/*", "*/")),
761 allow_single_quote_strings: true,
762 allow_double_quote_strings: true,
763 allow_triple_quote_strings: false,
764 allow_csharp_verbatim_strings: false,
765 skip_lines: HashSet::new(),
766 symbol_patterns: SP_GROOVY,
767 },
768 base,
769 ),
770 Language::Haskell => analyze_generic(
771 text,
772 ScanConfig {
773 line_comments: &["--"],
774 block_comment: Some(("{-", "-}")),
775 allow_single_quote_strings: true,
776 allow_double_quote_strings: true,
777 allow_triple_quote_strings: false,
778 allow_csharp_verbatim_strings: false,
779 skip_lines: HashSet::new(),
780 symbol_patterns: SP_HASKELL,
781 },
782 base,
783 ),
784 Language::Html | Language::Xml => analyze_generic(
785 text,
786 ScanConfig {
787 line_comments: &[],
788 block_comment: Some(("<!--", "-->")),
789 allow_single_quote_strings: false,
790 allow_double_quote_strings: false,
791 allow_triple_quote_strings: false,
792 allow_csharp_verbatim_strings: false,
793 skip_lines: HashSet::new(),
794 symbol_patterns: SP_NONE,
795 },
796 base,
797 ),
798 Language::Julia => analyze_generic(
799 text,
800 ScanConfig {
801 line_comments: &["#"],
802 block_comment: Some(("#=", "=#")),
803 allow_single_quote_strings: false,
804 allow_double_quote_strings: true,
805 allow_triple_quote_strings: true,
806 allow_csharp_verbatim_strings: false,
807 skip_lines: HashSet::new(),
808 symbol_patterns: SP_JULIA,
809 },
810 base,
811 ),
812 Language::Kotlin => analyze_generic(
813 text,
814 ScanConfig {
815 line_comments: &["//"],
816 block_comment: Some(("/*", "*/")),
817 allow_single_quote_strings: true,
818 allow_double_quote_strings: true,
819 allow_triple_quote_strings: false,
820 allow_csharp_verbatim_strings: false,
821 skip_lines: HashSet::new(),
822 symbol_patterns: SP_KOTLIN,
823 },
824 base,
825 ),
826 Language::Lua => analyze_generic(
827 text,
828 ScanConfig {
829 line_comments: &["--"],
830 block_comment: Some(("--[[", "]]")),
831 allow_single_quote_strings: true,
832 allow_double_quote_strings: true,
833 allow_triple_quote_strings: false,
834 allow_csharp_verbatim_strings: false,
835 skip_lines: HashSet::new(),
836 symbol_patterns: SP_LUA,
837 },
838 base,
839 ),
840 Language::Makefile => analyze_generic(
841 text,
842 ScanConfig {
843 line_comments: &["#"],
844 block_comment: None,
845 allow_single_quote_strings: false,
846 allow_double_quote_strings: false,
847 allow_triple_quote_strings: false,
848 allow_csharp_verbatim_strings: false,
849 skip_lines: HashSet::new(),
850 symbol_patterns: SP_NONE,
851 },
852 base,
853 ),
854 Language::Nim => analyze_generic(
855 text,
856 ScanConfig {
857 line_comments: &["#"],
858 block_comment: Some(("#[", "]#")),
859 allow_single_quote_strings: true,
860 allow_double_quote_strings: true,
861 allow_triple_quote_strings: false,
862 allow_csharp_verbatim_strings: false,
863 skip_lines: HashSet::new(),
864 symbol_patterns: SP_NIM,
865 },
866 base,
867 ),
868 Language::ObjectiveC => analyze_generic(
869 text,
870 ScanConfig {
871 line_comments: &["//"],
872 block_comment: Some(("/*", "*/")),
873 allow_single_quote_strings: true,
874 allow_double_quote_strings: true,
875 allow_triple_quote_strings: false,
876 allow_csharp_verbatim_strings: false,
877 skip_lines: HashSet::new(),
878 symbol_patterns: SP_OBJECTIVEC,
879 },
880 cpp,
881 ),
882 Language::Ocaml => analyze_generic(
883 text,
884 ScanConfig {
885 line_comments: &[],
886 block_comment: Some(("(*", "*)")),
887 allow_single_quote_strings: false,
888 allow_double_quote_strings: true,
889 allow_triple_quote_strings: false,
890 allow_csharp_verbatim_strings: false,
891 skip_lines: HashSet::new(),
892 symbol_patterns: SP_OCAML,
893 },
894 base,
895 ),
896 Language::Perl => analyze_generic(
897 text,
898 ScanConfig {
899 line_comments: &["#"],
900 block_comment: None,
901 allow_single_quote_strings: true,
902 allow_double_quote_strings: true,
903 allow_triple_quote_strings: false,
904 allow_csharp_verbatim_strings: false,
905 skip_lines: HashSet::new(),
906 symbol_patterns: SP_PERL,
907 },
908 base,
909 ),
910 Language::Php => analyze_generic(
911 text,
912 ScanConfig {
913 line_comments: &["//", "#"],
914 block_comment: Some(("/*", "*/")),
915 allow_single_quote_strings: true,
916 allow_double_quote_strings: true,
917 allow_triple_quote_strings: false,
918 allow_csharp_verbatim_strings: false,
919 skip_lines: HashSet::new(),
920 symbol_patterns: SP_PHP,
921 },
922 base,
923 ),
924 Language::R => analyze_generic(
925 text,
926 ScanConfig {
927 line_comments: &["#"],
928 block_comment: None,
929 allow_single_quote_strings: true,
930 allow_double_quote_strings: true,
931 allow_triple_quote_strings: false,
932 allow_csharp_verbatim_strings: false,
933 skip_lines: HashSet::new(),
934 symbol_patterns: SP_R,
935 },
936 base,
937 ),
938 Language::Ruby => analyze_generic(
939 text,
940 ScanConfig {
941 line_comments: &["#"],
942 block_comment: None,
943 allow_single_quote_strings: true,
944 allow_double_quote_strings: true,
945 allow_triple_quote_strings: false,
946 allow_csharp_verbatim_strings: false,
947 skip_lines: HashSet::new(),
948 symbol_patterns: SP_RUBY,
949 },
950 base,
951 ),
952 Language::Scala => analyze_generic(
953 text,
954 ScanConfig {
955 line_comments: &["//"],
956 block_comment: Some(("/*", "*/")),
957 allow_single_quote_strings: true,
958 allow_double_quote_strings: true,
959 allow_triple_quote_strings: false,
960 allow_csharp_verbatim_strings: false,
961 skip_lines: HashSet::new(),
962 symbol_patterns: SP_SCALA,
963 },
964 base,
965 ),
966 Language::Scss => analyze_generic(
967 text,
968 ScanConfig {
969 line_comments: &["//"],
970 block_comment: Some(("/*", "*/")),
971 allow_single_quote_strings: true,
972 allow_double_quote_strings: true,
973 allow_triple_quote_strings: false,
974 allow_csharp_verbatim_strings: false,
975 skip_lines: HashSet::new(),
976 symbol_patterns: SP_NONE,
977 },
978 base,
979 ),
980 Language::Sql => analyze_generic(
981 text,
982 ScanConfig {
983 line_comments: &["--"],
984 block_comment: Some(("/*", "*/")),
985 allow_single_quote_strings: true,
986 allow_double_quote_strings: false,
987 allow_triple_quote_strings: false,
988 allow_csharp_verbatim_strings: false,
989 skip_lines: HashSet::new(),
990 symbol_patterns: SP_SQL,
991 },
992 base,
993 ),
994 Language::Svelte => analyze_generic(
995 text,
996 ScanConfig {
997 line_comments: &["//"],
998 block_comment: Some(("/*", "*/")),
999 allow_single_quote_strings: true,
1000 allow_double_quote_strings: true,
1001 allow_triple_quote_strings: false,
1002 allow_csharp_verbatim_strings: false,
1003 skip_lines: HashSet::new(),
1004 symbol_patterns: SP_JS,
1005 },
1006 base,
1007 ),
1008 Language::Swift => analyze_generic(
1009 text,
1010 ScanConfig {
1011 line_comments: &["//"],
1012 block_comment: Some(("/*", "*/")),
1013 allow_single_quote_strings: false,
1014 allow_double_quote_strings: true,
1015 allow_triple_quote_strings: false,
1016 allow_csharp_verbatim_strings: false,
1017 skip_lines: HashSet::new(),
1018 symbol_patterns: SP_SWIFT,
1019 },
1020 base,
1021 ),
1022 Language::Vue => analyze_generic(
1023 text,
1024 ScanConfig {
1025 line_comments: &["//"],
1026 block_comment: Some(("/*", "*/")),
1027 allow_single_quote_strings: true,
1028 allow_double_quote_strings: true,
1029 allow_triple_quote_strings: false,
1030 allow_csharp_verbatim_strings: false,
1031 skip_lines: HashSet::new(),
1032 symbol_patterns: SP_JS,
1033 },
1034 base,
1035 ),
1036 Language::Zig => analyze_generic(
1037 text,
1038 ScanConfig {
1039 line_comments: &["//"],
1040 block_comment: None,
1041 allow_single_quote_strings: true,
1042 allow_double_quote_strings: true,
1043 allow_triple_quote_strings: false,
1044 allow_csharp_verbatim_strings: false,
1045 skip_lines: HashSet::new(),
1046 symbol_patterns: SP_ZIG,
1047 },
1048 base,
1049 ),
1050 }
1051}
1052
1053#[derive(Debug, Clone, Copy)]
1057struct SymbolPatterns {
1058 functions: &'static [&'static str],
1059 classes: &'static [&'static str],
1060 variables: &'static [&'static str],
1061 imports: &'static [&'static str],
1062}
1063
1064impl SymbolPatterns {
1065 const fn none() -> Self {
1066 Self {
1067 functions: &[],
1068 classes: &[],
1069 variables: &[],
1070 imports: &[],
1071 }
1072 }
1073}
1074
1075const SP_NONE: SymbolPatterns = SymbolPatterns::none();
1076
1077const SP_RUST: SymbolPatterns = SymbolPatterns {
1078 functions: &[
1079 "fn ",
1080 "pub fn ",
1081 "pub(crate) fn ",
1082 "pub(super) fn ",
1083 "async fn ",
1084 "pub async fn ",
1085 "pub(crate) async fn ",
1086 "unsafe fn ",
1087 "pub unsafe fn ",
1088 "pub(crate) unsafe fn ",
1089 "const fn ",
1090 "pub const fn ",
1091 "pub(crate) const fn ",
1092 "extern fn ",
1093 "pub extern fn ",
1094 ],
1095 classes: &[
1096 "struct ",
1097 "pub struct ",
1098 "pub(crate) struct ",
1099 "enum ",
1100 "pub enum ",
1101 "pub(crate) enum ",
1102 "trait ",
1103 "pub trait ",
1104 "pub(crate) trait ",
1105 "impl ",
1106 "impl<",
1107 "type ",
1108 "pub type ",
1109 "pub(crate) type ",
1110 ],
1111 variables: &["let ", "let mut "],
1112 imports: &["use ", "pub use ", "pub(crate) use ", "extern crate "],
1113};
1114
1115const SP_PYTHON: SymbolPatterns = SymbolPatterns {
1116 functions: &["def ", "async def "],
1117 classes: &["class "],
1118 variables: &[],
1119 imports: &["import ", "from "],
1120};
1121
1122const SP_JS: SymbolPatterns = SymbolPatterns {
1123 functions: &[
1124 "function ",
1125 "async function ",
1126 "export function ",
1127 "export async function ",
1128 "export default function ",
1129 ],
1130 classes: &["class ", "export class ", "export default class "],
1131 variables: &[
1132 "var ",
1133 "let ",
1134 "const ",
1135 "export var ",
1136 "export let ",
1137 "export const ",
1138 ],
1139 imports: &["import "],
1140};
1141
1142const SP_TS: SymbolPatterns = SymbolPatterns {
1143 functions: &[
1144 "function ",
1145 "async function ",
1146 "export function ",
1147 "export async function ",
1148 "export default function ",
1149 ],
1150 classes: &[
1151 "class ",
1152 "export class ",
1153 "export default class ",
1154 "abstract class ",
1155 "export abstract class ",
1156 "interface ",
1157 "export interface ",
1158 "declare class ",
1159 "declare interface ",
1160 ],
1161 variables: &[
1162 "var ",
1163 "let ",
1164 "const ",
1165 "export var ",
1166 "export let ",
1167 "export const ",
1168 ],
1169 imports: &["import "],
1170};
1171
1172const SP_GO: SymbolPatterns = SymbolPatterns {
1173 functions: &["func "],
1174 classes: &["type "],
1175 variables: &["var "],
1176 imports: &["import "],
1177};
1178
1179const SP_JAVA: SymbolPatterns = SymbolPatterns {
1180 functions: &[],
1181 classes: &[
1182 "class ",
1183 "public class ",
1184 "private class ",
1185 "protected class ",
1186 "abstract class ",
1187 "final class ",
1188 "public abstract class ",
1189 "public final class ",
1190 "interface ",
1191 "public interface ",
1192 "enum ",
1193 "public enum ",
1194 "record ",
1195 "public record ",
1196 "@interface ",
1197 ],
1198 variables: &[],
1199 imports: &["import "],
1200};
1201
1202const SP_CSHARP: SymbolPatterns = SymbolPatterns {
1203 functions: &[],
1204 classes: &[
1205 "class ",
1206 "public class ",
1207 "private class ",
1208 "protected class ",
1209 "internal class ",
1210 "abstract class ",
1211 "sealed class ",
1212 "static class ",
1213 "partial class ",
1214 "public abstract class ",
1215 "public sealed class ",
1216 "public static class ",
1217 "interface ",
1218 "public interface ",
1219 "internal interface ",
1220 "enum ",
1221 "public enum ",
1222 "struct ",
1223 "public struct ",
1224 "record ",
1225 "public record ",
1226 ],
1227 variables: &["var "],
1228 imports: &["using "],
1229};
1230
1231const SP_C: SymbolPatterns = SymbolPatterns {
1232 functions: &[],
1233 classes: &[
1234 "struct ",
1235 "typedef struct ",
1236 "union ",
1237 "typedef union ",
1238 "typedef enum ",
1239 ],
1240 variables: &[],
1241 imports: &["#include "],
1242};
1243
1244const SP_CPP: SymbolPatterns = SymbolPatterns {
1245 functions: &[],
1246 classes: &["class ", "struct ", "namespace ", "template "],
1247 variables: &[],
1248 imports: &["#include "],
1249};
1250
1251const SP_SHELL: SymbolPatterns = SymbolPatterns {
1252 functions: &["function "],
1253 classes: &[],
1254 variables: &["declare ", "local ", "export "],
1255 imports: &["source ", ". "],
1256};
1257
1258const SP_POWERSHELL: SymbolPatterns = SymbolPatterns {
1259 functions: &["function ", "Function "],
1260 classes: &["class "],
1261 variables: &[],
1262 imports: &["Import-Module ", "using "],
1263};
1264
1265const SP_KOTLIN: SymbolPatterns = SymbolPatterns {
1266 functions: &[
1267 "fun ",
1268 "private fun ",
1269 "public fun ",
1270 "protected fun ",
1271 "internal fun ",
1272 "override fun ",
1273 "suspend fun ",
1274 "abstract fun ",
1275 "open fun ",
1276 "private suspend fun ",
1277 "public suspend fun ",
1278 ],
1279 classes: &[
1280 "class ",
1281 "data class ",
1282 "sealed class ",
1283 "abstract class ",
1284 "open class ",
1285 "object ",
1286 "companion object",
1287 "interface ",
1288 "enum class ",
1289 "annotation class ",
1290 ],
1291 variables: &["val ", "var ", "private val ", "private var ", "const val "],
1292 imports: &["import "],
1293};
1294
1295const SP_SWIFT: SymbolPatterns = SymbolPatterns {
1296 functions: &[
1297 "func ",
1298 "private func ",
1299 "public func ",
1300 "internal func ",
1301 "override func ",
1302 "open func ",
1303 "static func ",
1304 "class func ",
1305 "mutating func ",
1306 "private static func ",
1307 "public static func ",
1308 ],
1309 classes: &[
1310 "class ",
1311 "struct ",
1312 "protocol ",
1313 "enum ",
1314 "extension ",
1315 "actor ",
1316 "public class ",
1317 "private class ",
1318 "open class ",
1319 "final class ",
1320 "public struct ",
1321 "private struct ",
1322 "public protocol ",
1323 ],
1324 variables: &[
1325 "var ",
1326 "let ",
1327 "private var ",
1328 "private let ",
1329 "static var ",
1330 "static let ",
1331 ],
1332 imports: &["import "],
1333};
1334
1335const SP_RUBY: SymbolPatterns = SymbolPatterns {
1336 functions: &["def ", "private def ", "protected def "],
1337 classes: &["class ", "module "],
1338 variables: &[],
1339 imports: &["require ", "require_relative "],
1340};
1341
1342const SP_SCALA: SymbolPatterns = SymbolPatterns {
1343 functions: &["def ", "private def ", "protected def ", "override def "],
1344 classes: &[
1345 "class ",
1346 "case class ",
1347 "abstract class ",
1348 "sealed class ",
1349 "object ",
1350 "trait ",
1351 ],
1352 variables: &["val ", "var ", "lazy val "],
1353 imports: &["import "],
1354};
1355
1356const SP_PHP: SymbolPatterns = SymbolPatterns {
1357 functions: &[
1358 "function ",
1359 "public function ",
1360 "private function ",
1361 "protected function ",
1362 "static function ",
1363 "abstract function ",
1364 "final function ",
1365 "public static function ",
1366 "private static function ",
1367 "protected static function ",
1368 ],
1369 classes: &[
1370 "class ",
1371 "abstract class ",
1372 "final class ",
1373 "interface ",
1374 "trait ",
1375 "enum ",
1376 ],
1377 variables: &[],
1378 imports: &[
1379 "use ",
1380 "require ",
1381 "require_once ",
1382 "include ",
1383 "include_once ",
1384 ],
1385};
1386
1387const SP_ELIXIR: SymbolPatterns = SymbolPatterns {
1388 functions: &[
1389 "def ",
1390 "defp ",
1391 "defmacro ",
1392 "defmacrop ",
1393 "defguard ",
1394 "defguardp ",
1395 ],
1396 classes: &["defmodule ", "defprotocol ", "defimpl "],
1397 variables: &[],
1398 imports: &["import ", "alias ", "use ", "require "],
1399};
1400
1401const SP_ERLANG: SymbolPatterns = SymbolPatterns {
1402 functions: &[],
1403 classes: &["-module("],
1404 variables: &[],
1405 imports: &["-import(", "-include(", "-include_lib("],
1406};
1407
1408const SP_FSHARP: SymbolPatterns = SymbolPatterns {
1409 functions: &[
1410 "let ",
1411 "let rec ",
1412 "member ",
1413 "override ",
1414 "abstract member ",
1415 ],
1416 classes: &["type "],
1417 variables: &["let mutable "],
1418 imports: &["open "],
1419};
1420
1421const SP_GROOVY: SymbolPatterns = SymbolPatterns {
1422 functions: &["def ", "private def ", "public def ", "protected def "],
1423 classes: &["class ", "abstract class ", "interface ", "enum ", "trait "],
1424 variables: &[],
1425 imports: &["import "],
1426};
1427
1428const SP_HASKELL: SymbolPatterns = SymbolPatterns {
1429 functions: &[],
1430 classes: &["class ", "data ", "newtype ", "type "],
1431 variables: &[],
1432 imports: &["import "],
1433};
1434
1435const SP_LUA: SymbolPatterns = SymbolPatterns {
1436 functions: &["function ", "local function "],
1437 classes: &[],
1438 variables: &["local "],
1439 imports: &[],
1440};
1441
1442const SP_NIM: SymbolPatterns = SymbolPatterns {
1443 functions: &[
1444 "proc ",
1445 "func ",
1446 "method ",
1447 "iterator ",
1448 "converter ",
1449 "template ",
1450 "macro ",
1451 ],
1452 classes: &["type "],
1453 variables: &["var ", "let ", "const "],
1454 imports: &["import ", "from "],
1455};
1456
1457const SP_OBJECTIVEC: SymbolPatterns = SymbolPatterns {
1458 functions: &["- (", "+ ("],
1459 classes: &["@interface ", "@implementation ", "@protocol "],
1460 variables: &[],
1461 imports: &["#import ", "#include "],
1462};
1463
1464const SP_OCAML: SymbolPatterns = SymbolPatterns {
1465 functions: &["let ", "let rec "],
1466 classes: &["type ", "module ", "class "],
1467 variables: &[],
1468 imports: &["open "],
1469};
1470
1471const SP_PERL: SymbolPatterns = SymbolPatterns {
1472 functions: &["sub "],
1473 classes: &["package "],
1474 variables: &["my ", "our ", "local "],
1475 imports: &["use ", "require "],
1476};
1477
1478const SP_CLOJURE: SymbolPatterns = SymbolPatterns {
1479 functions: &["(defn ", "(defn- ", "(defmacro ", "(defmulti "],
1480 classes: &[
1481 "(defrecord ",
1482 "(defprotocol ",
1483 "(deftype ",
1484 "(definterface ",
1485 ],
1486 variables: &["(def ", "(defonce "],
1487 imports: &["(ns ", "(require "],
1488};
1489
1490const SP_JULIA: SymbolPatterns = SymbolPatterns {
1491 functions: &["function ", "macro "],
1492 classes: &[
1493 "struct ",
1494 "mutable struct ",
1495 "abstract type ",
1496 "primitive type ",
1497 ],
1498 variables: &["const "],
1499 imports: &["import ", "using "],
1500};
1501
1502const SP_DART: SymbolPatterns = SymbolPatterns {
1503 functions: &[],
1504 classes: &["class ", "abstract class ", "mixin ", "extension ", "enum "],
1505 variables: &["var ", "final ", "const ", "late "],
1506 imports: &["import "],
1507};
1508
1509const SP_R: SymbolPatterns = SymbolPatterns {
1510 functions: &[],
1511 classes: &[],
1512 variables: &[],
1513 imports: &["library(", "source("],
1514};
1515
1516const SP_SQL: SymbolPatterns = SymbolPatterns {
1517 functions: &[
1518 "create function ",
1519 "create or replace function ",
1520 "create procedure ",
1521 "create or replace procedure ",
1522 "CREATE FUNCTION ",
1523 "CREATE OR REPLACE FUNCTION ",
1524 "CREATE PROCEDURE ",
1525 "CREATE OR REPLACE PROCEDURE ",
1526 ],
1527 classes: &[
1528 "create table ",
1529 "create view ",
1530 "create schema ",
1531 "CREATE TABLE ",
1532 "CREATE VIEW ",
1533 "CREATE SCHEMA ",
1534 ],
1535 variables: &["declare ", "DECLARE "],
1536 imports: &[],
1537};
1538
1539const SP_ASSEMBLY: SymbolPatterns = SymbolPatterns {
1540 functions: &["proc ", "PROC "],
1541 classes: &[],
1542 variables: &[],
1543 imports: &["include ", "INCLUDE ", "%include "],
1544};
1545
1546const SP_ZIG: SymbolPatterns = SymbolPatterns {
1547 functions: &[
1548 "fn ",
1549 "pub fn ",
1550 "export fn ",
1551 "inline fn ",
1552 "pub inline fn ",
1553 ],
1554 classes: &[],
1555 variables: &["var ", "pub var "],
1556 imports: &[],
1557};
1558
1559#[derive(Debug, Clone)]
1560struct ScanConfig {
1561 line_comments: &'static [&'static str],
1562 block_comment: Option<(&'static str, &'static str)>,
1563 allow_single_quote_strings: bool,
1564 allow_double_quote_strings: bool,
1565 allow_triple_quote_strings: bool,
1566 allow_csharp_verbatim_strings: bool,
1567 skip_lines: HashSet<usize>,
1568 symbol_patterns: SymbolPatterns,
1569}
1570
1571#[derive(Debug, Clone, Copy)]
1574struct IeeeFlags {
1575 has_preprocessor_directives: bool,
1577 blank_in_block_comment_as_comment: bool,
1579 collapse_continuation_lines: bool,
1581}
1582
1583#[derive(Debug, Clone, Copy)]
1584enum StringState {
1585 Single(char),
1586 Triple(&'static str),
1587 VerbatimDouble,
1588}
1589
1590#[derive(Debug, Default)]
1591struct LineFacts {
1592 has_code: bool,
1593 has_single_comment: bool,
1594 has_multi_comment: bool,
1595 has_docstring: bool,
1596}
1597
1598fn analyze_generic(text: &str, config: ScanConfig, ieee: IeeeFlags) -> RawFileAnalysis {
1599 let normalized = if text.is_empty() {
1600 String::new()
1601 } else {
1602 text.replace("\r\n", "\n").replace('\r', "\n")
1603 };
1604
1605 let lines: Vec<&str> = if normalized.is_empty() {
1606 Vec::new()
1607 } else {
1608 normalized.split_terminator('\n').collect()
1609 };
1610
1611 let mut raw = RawLineCounts::default();
1612 let mut warnings = Vec::new();
1613
1614 let mut in_block_comment = false;
1615 let mut string_state: Option<StringState> = None;
1616 let mut pending_continuation: Option<LineFacts> = None;
1618
1619 for (line_idx, line) in lines.iter().enumerate() {
1620 raw.total_physical_lines += 1;
1621
1622 if config.skip_lines.contains(&line_idx) {
1623 raw.docstring_comment_lines += 1;
1624 continue;
1625 }
1626
1627 let mut facts = LineFacts::default();
1628 let trimmed = line.trim();
1629
1630 if in_block_comment && (ieee.blank_in_block_comment_as_comment || !trimmed.is_empty()) {
1634 facts.has_multi_comment = true;
1635 }
1636
1637 let chars: Vec<char> = line.chars().collect();
1638 let mut i = 0usize;
1639 while i < chars.len() {
1640 if config.skip_lines.contains(&line_idx) {
1641 break;
1642 }
1643
1644 if let Some(state) = string_state {
1645 facts.has_code = true;
1646 match state {
1647 StringState::Single(delim) => {
1648 if chars[i] == '\\' {
1649 i += 2;
1650 continue;
1651 }
1652 if chars[i] == delim {
1653 string_state = None;
1654 }
1655 i += 1;
1656 continue;
1657 }
1658 StringState::Triple(delim) => {
1659 if starts_with(&chars, i, delim) {
1660 string_state = None;
1661 i += delim.len();
1662 } else {
1663 i += 1;
1664 }
1665 continue;
1666 }
1667 StringState::VerbatimDouble => {
1668 if starts_with(&chars, i, "\"\"") {
1669 i += 2;
1670 continue;
1671 }
1672 if chars[i] == '"' {
1673 string_state = None;
1674 }
1675 i += 1;
1676 continue;
1677 }
1678 }
1679 }
1680
1681 if in_block_comment {
1682 facts.has_multi_comment = true;
1683 if let Some((_, close)) = config.block_comment {
1684 if starts_with(&chars, i, close) {
1685 in_block_comment = false;
1686 i += close.len();
1687 } else {
1688 i += 1;
1689 }
1690 continue;
1691 }
1692 }
1693
1694 if chars[i].is_whitespace() {
1695 i += 1;
1696 continue;
1697 }
1698
1699 if config.allow_csharp_verbatim_strings && starts_with(&chars, i, "@\"") {
1700 facts.has_code = true;
1701 string_state = Some(StringState::VerbatimDouble);
1702 i += 2;
1703 continue;
1704 }
1705
1706 if config.allow_triple_quote_strings {
1707 if starts_with(&chars, i, "\"\"\"") {
1708 facts.has_code = true;
1709 string_state = Some(StringState::Triple("\"\"\""));
1710 i += 3;
1711 continue;
1712 }
1713 if starts_with(&chars, i, "'''") {
1714 facts.has_code = true;
1715 string_state = Some(StringState::Triple("'''"));
1716 i += 3;
1717 continue;
1718 }
1719 }
1720
1721 if config.allow_single_quote_strings && chars[i] == '\'' {
1722 facts.has_code = true;
1723 string_state = Some(StringState::Single('\''));
1724 i += 1;
1725 continue;
1726 }
1727
1728 if config.allow_double_quote_strings && chars[i] == '"' {
1729 facts.has_code = true;
1730 string_state = Some(StringState::Single('"'));
1731 i += 1;
1732 continue;
1733 }
1734
1735 if let Some((open, _)) = config.block_comment {
1736 if starts_with(&chars, i, open) {
1737 facts.has_multi_comment = true;
1738 in_block_comment = true;
1739 i += open.len();
1740 continue;
1741 }
1742 }
1743
1744 if let Some(prefix) = config
1745 .line_comments
1746 .iter()
1747 .find(|prefix| starts_with(&chars, i, prefix))
1748 {
1749 let _ = prefix;
1750 facts.has_single_comment = true;
1751 break;
1752 }
1753
1754 facts.has_code = true;
1755 i += 1;
1756 }
1757
1758 if ieee.has_preprocessor_directives
1762 && facts.has_code
1763 && !facts.has_single_comment
1764 && !facts.has_multi_comment
1765 && trimmed.starts_with('#')
1766 {
1767 raw.compiler_directive_lines += 1;
1768 }
1769
1770 let is_continuation = ieee.collapse_continuation_lines
1773 && !in_block_comment
1774 && string_state.is_none()
1775 && trimmed.ends_with('\\');
1776
1777 if is_continuation {
1778 let pending = pending_continuation.get_or_insert_with(LineFacts::default);
1779 pending.has_code |= facts.has_code;
1780 pending.has_single_comment |= facts.has_single_comment;
1781 pending.has_multi_comment |= facts.has_multi_comment;
1782 pending.has_docstring |= facts.has_docstring;
1783 continue; }
1785
1786 let emit = if let Some(pending) = pending_continuation.take() {
1788 LineFacts {
1789 has_code: pending.has_code | facts.has_code,
1790 has_single_comment: pending.has_single_comment | facts.has_single_comment,
1791 has_multi_comment: pending.has_multi_comment | facts.has_multi_comment,
1792 has_docstring: pending.has_docstring | facts.has_docstring,
1793 }
1794 } else {
1795 facts
1796 };
1797
1798 classify_line(&mut raw, &emit, trimmed);
1799
1800 if emit.has_code {
1801 let (f, c, v, i) = count_symbols(&config.symbol_patterns, trimmed);
1802 raw.functions += f;
1803 raw.classes += c;
1804 raw.variables += v;
1805 raw.imports += i;
1806 }
1807 }
1808
1809 if let Some(pending) = pending_continuation.take() {
1811 classify_line(&mut raw, &pending, "");
1812 }
1813
1814 if in_block_comment {
1815 warnings.push("unclosed block comment detected; result is best effort".into());
1816 }
1817 if string_state.is_some() {
1818 warnings.push("unclosed string literal detected; result is best effort".into());
1819 }
1820
1821 RawFileAnalysis {
1822 raw,
1823 parse_mode: if warnings.is_empty() {
1824 ParseMode::Lexical
1825 } else {
1826 ParseMode::LexicalBestEffort
1827 },
1828 warnings,
1829 }
1830}
1831
1832fn classify_line(raw: &mut RawLineCounts, facts: &LineFacts, trimmed: &str) {
1833 if facts.has_docstring {
1834 raw.docstring_comment_lines += 1;
1835 } else if !facts.has_code
1836 && !facts.has_single_comment
1837 && !facts.has_multi_comment
1838 && trimmed.is_empty()
1839 {
1840 raw.blank_only_lines += 1;
1841 } else if facts.has_code && facts.has_single_comment {
1842 raw.mixed_code_single_comment_lines += 1;
1843 } else if facts.has_code && facts.has_multi_comment {
1844 raw.mixed_code_multi_comment_lines += 1;
1845 } else if facts.has_code {
1846 raw.code_only_lines += 1;
1847 } else if facts.has_single_comment {
1848 raw.single_comment_only_lines += 1;
1849 } else if facts.has_multi_comment {
1850 raw.multi_comment_only_lines += 1;
1851 } else if trimmed.is_empty() {
1852 raw.blank_only_lines += 1;
1853 } else {
1854 raw.skipped_unknown_lines += 1;
1855 }
1856}
1857
1858fn count_symbols(patterns: &SymbolPatterns, trimmed: &str) -> (u64, u64, u64, u64) {
1859 let hit = |pats: &[&str]| pats.iter().any(|p| trimmed.starts_with(p)) as u64;
1860 (
1861 hit(patterns.functions),
1862 hit(patterns.classes),
1863 hit(patterns.variables),
1864 hit(patterns.imports),
1865 )
1866}
1867
1868fn starts_with(chars: &[char], index: usize, needle: &str) -> bool {
1869 let needle_chars: Vec<char> = needle.chars().collect();
1870 chars.get(index..index + needle_chars.len()) == Some(needle_chars.as_slice())
1871}
1872
1873fn detect_python_docstring_lines(text: &str) -> HashSet<usize> {
1874 let normalized = if text.is_empty() {
1875 String::new()
1876 } else {
1877 text.replace("\r\n", "\n").replace('\r', "\n")
1878 };
1879
1880 let lines: Vec<&str> = if normalized.is_empty() {
1881 Vec::new()
1882 } else {
1883 normalized.split_terminator('\n').collect()
1884 };
1885
1886 #[derive(Debug, Clone)]
1887 struct PyContext {
1888 indent: usize,
1889 expect_docstring: bool,
1890 }
1891
1892 let mut docstring_lines = HashSet::new();
1893 let mut contexts = vec![PyContext {
1894 indent: 0,
1895 expect_docstring: true,
1896 }];
1897 let mut pending_block_indent: Option<usize> = None;
1898 let mut active_docstring: Option<(&'static str, usize)> = None;
1899
1900 for (idx, line) in lines.iter().enumerate() {
1901 let trimmed = line.trim();
1902 let indent = leading_indent(line);
1903
1904 if let Some((delim, start_line)) = active_docstring {
1905 docstring_lines.insert(idx);
1906 if closes_triple_docstring(trimmed, delim, idx == start_line) {
1907 active_docstring = None;
1908 }
1909 continue;
1910 }
1911
1912 if trimmed.is_empty() || trimmed.starts_with('#') {
1913 continue;
1914 }
1915
1916 while contexts.len() > 1 && indent < contexts.last().map(|c| c.indent).unwrap_or(0) {
1917 contexts.pop();
1918 }
1919
1920 if let Some(base_indent) = pending_block_indent {
1921 if indent > base_indent {
1922 contexts.push(PyContext {
1923 indent,
1924 expect_docstring: true,
1925 });
1926 pending_block_indent = None;
1927 } else if !trimmed.starts_with('@') {
1928 pending_block_indent = None;
1929 }
1930 }
1931
1932 if let Some(ctx) = contexts.last_mut() {
1933 if ctx.expect_docstring {
1934 if let Some(delim) = docstring_delimiter(trimmed) {
1935 docstring_lines.insert(idx);
1936 ctx.expect_docstring = false;
1937 if !closes_triple_docstring(trimmed, delim, true) {
1938 active_docstring = Some((delim, idx));
1939 }
1940 continue;
1941 }
1942 ctx.expect_docstring = false;
1943 }
1944 }
1945
1946 if is_python_block_header(trimmed) {
1947 pending_block_indent = Some(indent);
1948 }
1949 }
1950
1951 if let Some((_, start_line)) = active_docstring {
1952 for idx in start_line..lines.len() {
1953 docstring_lines.insert(idx);
1954 }
1955 }
1956
1957 docstring_lines
1958}
1959
1960fn leading_indent(line: &str) -> usize {
1961 line.chars().take_while(|c| c.is_whitespace()).count()
1962}
1963
1964fn is_python_block_header(trimmed: &str) -> bool {
1965 (trimmed.starts_with("def ")
1966 || trimmed.starts_with("async def ")
1967 || trimmed.starts_with("class "))
1968 && trimmed.ends_with(':')
1969}
1970
1971fn docstring_delimiter(trimmed: &str) -> Option<&'static str> {
1972 let mut idx = 0usize;
1973 let bytes = trimmed.as_bytes();
1974 while idx < bytes.len() {
1975 let c = bytes[idx] as char;
1976 if matches!(c, 'r' | 'R' | 'u' | 'U' | 'b' | 'B' | 'f' | 'F') {
1977 idx += 1;
1978 continue;
1979 }
1980 break;
1981 }
1982
1983 let rest = &trimmed[idx..];
1984 if rest.starts_with("\"\"\"") {
1985 Some("\"\"\"")
1986 } else if rest.starts_with("'''") {
1987 Some("'''")
1988 } else {
1989 None
1990 }
1991}
1992
1993fn closes_triple_docstring(trimmed: &str, delim: &str, same_line_as_start: bool) -> bool {
1994 let mut occurrences = 0usize;
1995 let mut search = trimmed;
1996 while let Some(index) = search.find(delim) {
1997 occurrences += 1;
1998 search = &search[index + delim.len()..];
1999 }
2000
2001 if same_line_as_start {
2002 occurrences >= 2
2003 } else {
2004 occurrences >= 1
2005 }
2006}
2007
2008#[cfg(feature = "tree-sitter")]
2012pub mod ts {
2013 use tree_sitter::Node;
2014
2015 use super::{ParseMode, RawFileAnalysis, RawLineCounts};
2016
2017 fn analyze_lines(
2022 text: &str,
2023 ts_language: tree_sitter::Language,
2024 comment_node_kinds: &[&str],
2025 docstring_stmt_kind: Option<&str>,
2026 ) -> Option<RawFileAnalysis> {
2027 let mut parser = tree_sitter::Parser::new();
2028 parser.set_language(&ts_language).ok()?;
2029 let tree = parser.parse(text, None)?;
2030
2031 let lines: Vec<&str> = text.split_terminator('\n').collect();
2032 let n = lines.len();
2033
2034 let mut has_code = vec![false; n];
2035 let mut has_comment = vec![false; n];
2036 let mut comment_is_block = vec![false; n];
2037 let mut has_docstring = vec![false; n];
2038
2039 let mut ctx = VisitCtx {
2041 source: text.as_bytes(),
2042 comment_kinds: comment_node_kinds,
2043 docstring_stmt_kind,
2044 has_code: &mut has_code,
2045 has_comment: &mut has_comment,
2046 comment_is_block: &mut comment_is_block,
2047 has_docstring: &mut has_docstring,
2048 };
2049 visit(tree.root_node(), &mut ctx);
2050
2051 let mut raw = RawLineCounts::default();
2052
2053 for i in 0..n {
2054 raw.total_physical_lines += 1;
2055 let trimmed = lines[i].trim();
2056
2057 if trimmed.is_empty() {
2058 raw.blank_only_lines += 1;
2059 } else if has_docstring[i] && !has_code[i] {
2060 raw.docstring_comment_lines += 1;
2061 } else if has_code[i] && has_comment[i] {
2062 if comment_is_block[i] {
2064 raw.mixed_code_multi_comment_lines += 1;
2065 } else {
2066 raw.mixed_code_single_comment_lines += 1;
2067 }
2068 } else if has_comment[i] {
2069 if comment_is_block[i] {
2070 raw.multi_comment_only_lines += 1;
2071 } else {
2072 raw.single_comment_only_lines += 1;
2073 }
2074 } else {
2075 raw.code_only_lines += 1;
2076 }
2077 }
2078
2079 Some(RawFileAnalysis {
2080 raw,
2081 parse_mode: ParseMode::TreeSitter,
2082 warnings: Vec::new(),
2083 })
2084 }
2085
2086 struct VisitCtx<'a> {
2087 source: &'a [u8],
2088 comment_kinds: &'a [&'a str],
2089 docstring_stmt_kind: Option<&'a str>,
2090 has_code: &'a mut Vec<bool>,
2091 has_comment: &'a mut Vec<bool>,
2092 comment_is_block: &'a mut Vec<bool>,
2093 has_docstring: &'a mut Vec<bool>,
2094 }
2095
2096 fn visit(node: Node, ctx: &mut VisitCtx<'_>) {
2097 let kind = node.kind();
2098 let start_row = node.start_position().row;
2099 let end_row = node.end_position().row;
2100
2101 if ctx.comment_kinds.contains(&kind) {
2102 let first_two = node
2103 .utf8_text(ctx.source)
2104 .unwrap_or("")
2105 .get(..2)
2106 .unwrap_or("");
2107 let is_block = first_two == "/*" || first_two == "<#";
2108 for row in start_row..=end_row {
2109 if row < ctx.has_comment.len() {
2110 ctx.has_comment[row] = true;
2111 if is_block {
2112 ctx.comment_is_block[row] = true;
2113 }
2114 }
2115 }
2116 return;
2117 }
2118
2119 if let Some(stmt_kind) = ctx.docstring_stmt_kind {
2121 if kind == stmt_kind && node.named_child_count() == 1 {
2122 if let Some(child) = node.named_child(0) {
2123 if child.kind() == "string" {
2124 let child_start = child.start_position().row;
2125 let child_end = child.end_position().row;
2126 for row in child_start..=child_end {
2127 if row < ctx.has_docstring.len() {
2128 ctx.has_docstring[row] = true;
2129 }
2130 }
2131 return;
2132 }
2133 }
2134 }
2135 }
2136
2137 if node.child_count() == 0 && !node.is_extra() {
2139 for row in start_row..=end_row {
2140 if row < ctx.has_code.len() {
2141 ctx.has_code[row] = true;
2142 }
2143 }
2144 return;
2145 }
2146
2147 for i in 0..node.child_count() {
2148 if let Some(child) = node.child(i) {
2149 visit(child, ctx);
2150 }
2151 }
2152 }
2153
2154 pub fn analyze_c(text: &str) -> Option<RawFileAnalysis> {
2156 analyze_lines(text, tree_sitter_c::language(), &["comment"], None)
2157 }
2158
2159 pub fn analyze_python(text: &str) -> Option<RawFileAnalysis> {
2161 analyze_lines(
2162 text,
2163 tree_sitter_python::language(),
2164 &["comment"],
2165 Some("expression_statement"),
2166 )
2167 }
2168}
2169
2170#[cfg(test)]
2171mod tests {
2172 use super::*;
2173
2174 #[test]
2175 fn python_docstrings_are_separated() {
2176 let input = r####""""module docs""""
2177
2178
2179def fn_a():
2180 """function docs"""
2181 value = 1 # trailing comment
2182 return value
2183"####;
2184
2185 let result = analyze_text(Language::Python, input, AnalysisOptions::default());
2186 assert_eq!(result.raw.docstring_comment_lines, 2);
2187 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2188 assert_eq!(result.raw.code_only_lines, 2);
2189 }
2190
2191 #[test]
2192 fn c_style_mixed_lines_are_captured() {
2193 let input = "int x = 1; // note\n/* block */\n";
2194 let result = analyze_text(Language::C, input, AnalysisOptions::default());
2195 assert_eq!(result.raw.mixed_code_single_comment_lines, 1);
2196 assert_eq!(result.raw.multi_comment_only_lines, 1);
2197 }
2198
2199 #[test]
2200 fn detect_language_by_shebang() {
2201 let language = detect_language(
2202 Path::new("script"),
2203 Some("#!/usr/bin/env bash"),
2204 &BTreeMap::new(),
2205 true,
2206 );
2207 assert_eq!(language, Some(Language::Shell));
2208 }
2209}