1use once_cell::sync::Lazy;
10use regex::Regex;
11use scribe_core::{Language, Result, ScribeError};
12use scribe_selection::ast_parser::{AstImport, AstLanguage, AstParser};
13use serde::{Deserialize, Serialize};
14use std::collections::{HashMap, HashSet};
15use std::fs;
16use std::path::{Path, PathBuf};
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ContentStats {
21 pub imports: ImportInfo,
22 pub documentation: DocumentationInfo,
23 pub complexity: ComplexityMetrics,
24 pub structure: StructureInfo,
25 pub text_stats: TextStats,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ImportInfo {
31 pub total_imports: usize,
32 pub unique_imports: usize,
33 pub import_sources: Vec<ImportSource>,
34 pub external_dependencies: HashSet<String>,
35 pub internal_dependencies: HashSet<String>,
36 pub relative_imports: usize,
37 pub absolute_imports: usize,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ImportSource {
43 pub module: String,
44 pub alias: Option<String>,
45 pub items: Vec<String>,
46 pub line_number: usize,
47 pub import_type: ImportType,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
52pub enum ImportType {
53 Standard, External, Internal, Relative, Dynamic, }
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct DocumentationInfo {
63 pub headings: Vec<Heading>,
64 pub links: Vec<Link>,
65 pub code_blocks: Vec<CodeBlock>,
66 pub tables: usize,
67 pub lists: usize,
68 pub images: usize,
69 pub todo_comments: Vec<TodoComment>,
70 pub docstrings: Vec<Docstring>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Heading {
76 pub level: usize,
77 pub text: String,
78 pub line_number: usize,
79 pub anchor: Option<String>,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Link {
85 pub text: String,
86 pub url: String,
87 pub line_number: usize,
88 pub link_type: LinkType,
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
93pub enum LinkType {
94 Internal, External, Relative, Anchor, }
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct CodeBlock {
103 pub language: Option<String>,
104 pub content: String,
105 pub line_number: usize,
106 pub line_count: usize,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct TodoComment {
112 pub comment_type: TodoType,
113 pub text: String,
114 pub line_number: usize,
115 pub author: Option<String>,
116}
117
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
120pub enum TodoType {
121 Todo,
122 Fixme,
123 Note,
124 Bug,
125 Hack,
126 Warning,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct Docstring {
132 pub content: String,
133 pub line_number: usize,
134 pub line_count: usize,
135 pub style: DocstringStyle,
136}
137
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
140pub enum DocstringStyle {
141 Google,
142 Numpy,
143 Sphinx,
144 Rustdoc,
145 Javadoc,
146 JSDoc,
147 Unknown,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct ComplexityMetrics {
153 pub cyclomatic_complexity: usize,
154 pub function_count: usize,
155 pub class_count: usize,
156 pub nesting_depth: usize,
157 pub cognitive_complexity: usize,
158 pub halstead_metrics: HalsteadMetrics,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct HalsteadMetrics {
164 pub distinct_operators: usize,
165 pub distinct_operands: usize,
166 pub total_operators: usize,
167 pub total_operands: usize,
168 pub vocabulary: usize,
169 pub length: usize,
170 pub difficulty: f64,
171 pub effort: f64,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct StructureInfo {
177 pub functions: Vec<FunctionInfo>,
178 pub classes: Vec<ClassInfo>,
179 pub constants: Vec<ConstantInfo>,
180 pub interfaces: Vec<InterfaceInfo>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct FunctionInfo {
186 pub name: String,
187 pub line_number: usize,
188 pub line_count: usize,
189 pub parameters: Vec<String>,
190 pub return_type: Option<String>,
191 pub visibility: Visibility,
192 pub is_async: bool,
193 pub is_generator: bool,
194 pub docstring: Option<String>,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ClassInfo {
200 pub name: String,
201 pub line_number: usize,
202 pub line_count: usize,
203 pub parent_classes: Vec<String>,
204 pub methods: Vec<FunctionInfo>,
205 pub attributes: Vec<String>,
206 pub visibility: Visibility,
207 pub docstring: Option<String>,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct ConstantInfo {
213 pub name: String,
214 pub line_number: usize,
215 pub value_type: Option<String>,
216 pub visibility: Visibility,
217}
218
219#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct InterfaceInfo {
222 pub name: String,
223 pub line_number: usize,
224 pub methods: Vec<String>,
225 pub extends: Vec<String>,
226}
227
228#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
230pub enum Visibility {
231 Public,
232 Private,
233 Protected,
234 Package,
235 Unknown,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct TextStats {
241 pub line_count: usize,
242 pub non_empty_line_count: usize,
243 pub comment_line_count: usize,
244 pub code_line_count: usize,
245 pub blank_line_count: usize,
246 pub character_count: usize,
247 pub word_count: usize,
248 pub comment_density: f64, }
250
251pub struct ContentAnalyzer {
253 regex_cache: HashMap<String, Regex>,
254 ast_parser: AstParser,
255}
256
257static HEADING_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(#{1,6})\s+(.+)").unwrap());
259static LINK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
260static TODO_REGEX: Lazy<Regex> = Lazy::new(|| {
261 Regex::new(r"(?i)(?://|#|/\*|\*|<!--)\s*(TODO|FIXME|NOTE|BUG|HACK|WARNING):?\s*(.*)").unwrap()
262});
263static CODE_BLOCK_REGEX: Lazy<Regex> =
264 Lazy::new(|| Regex::new(r"```(\w+)?\n((?s).*?)```").unwrap());
265
266impl Default for ContentStats {
267 fn default() -> Self {
268 Self {
269 imports: ImportInfo::default(),
270 documentation: DocumentationInfo::default(),
271 complexity: ComplexityMetrics::default(),
272 structure: StructureInfo::default(),
273 text_stats: TextStats::default(),
274 }
275 }
276}
277
278impl Default for ImportInfo {
279 fn default() -> Self {
280 Self {
281 total_imports: 0,
282 unique_imports: 0,
283 import_sources: Vec::new(),
284 external_dependencies: HashSet::new(),
285 internal_dependencies: HashSet::new(),
286 relative_imports: 0,
287 absolute_imports: 0,
288 }
289 }
290}
291
292impl Default for DocumentationInfo {
293 fn default() -> Self {
294 Self {
295 headings: Vec::new(),
296 links: Vec::new(),
297 code_blocks: Vec::new(),
298 tables: 0,
299 lists: 0,
300 images: 0,
301 todo_comments: Vec::new(),
302 docstrings: Vec::new(),
303 }
304 }
305}
306
307impl Default for ComplexityMetrics {
308 fn default() -> Self {
309 Self {
310 cyclomatic_complexity: 0,
311 function_count: 0,
312 class_count: 0,
313 nesting_depth: 0,
314 cognitive_complexity: 0,
315 halstead_metrics: HalsteadMetrics::default(),
316 }
317 }
318}
319
320impl Default for HalsteadMetrics {
321 fn default() -> Self {
322 Self {
323 distinct_operators: 0,
324 distinct_operands: 0,
325 total_operators: 0,
326 total_operands: 0,
327 vocabulary: 0,
328 length: 0,
329 difficulty: 0.0,
330 effort: 0.0,
331 }
332 }
333}
334
335impl Default for StructureInfo {
336 fn default() -> Self {
337 Self {
338 functions: Vec::new(),
339 classes: Vec::new(),
340 constants: Vec::new(),
341 interfaces: Vec::new(),
342 }
343 }
344}
345
346impl Default for TextStats {
347 fn default() -> Self {
348 Self {
349 line_count: 0,
350 non_empty_line_count: 0,
351 comment_line_count: 0,
352 code_line_count: 0,
353 blank_line_count: 0,
354 character_count: 0,
355 word_count: 0,
356 comment_density: 0.0,
357 }
358 }
359}
360
361impl ContentAnalyzer {
362 pub fn new() -> Self {
364 Self {
365 regex_cache: HashMap::new(),
366 ast_parser: AstParser::new().expect("Failed to initialize AST parser"),
367 }
368 }
369
370 pub async fn analyze_file(&self, path: &Path) -> Result<ContentStats> {
372 let content = tokio::fs::read_to_string(path).await.map_err(|e| {
373 ScribeError::io(format!("Failed to read file {}: {}", path.display(), e), e)
374 })?;
375
376 let language = self.detect_language_from_path(path);
377 self.analyze_content(&content, &language).await
378 }
379
380 pub async fn analyze_content(
382 &self,
383 content: &str,
384 language: &Language,
385 ) -> Result<ContentStats> {
386 let mut stats = ContentStats::default();
387
388 let (imports, documentation, complexity, structure, text_stats) = tokio::join!(
390 self.analyze_imports_async(content, language),
391 self.analyze_documentation_async(content),
392 self.analyze_complexity_async(content, language),
393 self.analyze_structure_async(content, language),
394 self.analyze_text_stats_async(content)
395 );
396
397 stats.imports = imports?;
398 stats.documentation = documentation?;
399 stats.complexity = complexity?;
400 stats.structure = structure?;
401 stats.text_stats = text_stats?;
402
403 Ok(stats)
404 }
405
406 async fn analyze_imports_async(
408 &self,
409 content: &str,
410 language: &Language,
411 ) -> Result<ImportInfo> {
412 let mut import_info = ImportInfo::default();
413
414 let ast_language = match language {
416 Language::Python => Some(AstLanguage::Python),
417 Language::JavaScript => Some(AstLanguage::JavaScript),
418 Language::TypeScript => Some(AstLanguage::TypeScript),
419 Language::Go => Some(AstLanguage::Go),
420 Language::Rust => Some(AstLanguage::Rust),
421 _ => None, };
423
424 if let Some(ast_lang) = ast_language {
425 match self.ast_parser.extract_imports(content, ast_lang) {
427 Ok(imports) => {
428 for (line_number, import) in imports.into_iter().enumerate() {
429 let import_type = self.classify_import_type(&import.module);
430
431 let import_source = ImportSource {
432 module: import.module.clone(),
433 alias: import.alias,
434 items: import.items,
435 line_number: line_number + 1,
436 import_type: import_type.clone(),
437 };
438
439 import_info.import_sources.push(import_source);
440
441 match import_type {
443 ImportType::External => {
444 import_info.external_dependencies.insert(import.module);
445 import_info.absolute_imports += 1;
446 }
447 ImportType::Internal => {
448 import_info.internal_dependencies.insert(import.module);
449 import_info.absolute_imports += 1;
450 }
451 ImportType::Relative => {
452 import_info.relative_imports += 1;
453 }
454 _ => {
455 import_info.absolute_imports += 1;
456 }
457 }
458 }
459
460 import_info.total_imports = import_info.import_sources.len();
461 import_info.unique_imports = import_info.external_dependencies.len()
462 + import_info.internal_dependencies.len();
463 }
464 Err(_) => {
465 }
468 }
469 }
470
471 Ok(import_info)
472 }
473
474 async fn analyze_documentation_async(&self, content: &str) -> Result<DocumentationInfo> {
476 let mut doc_info = DocumentationInfo::default();
477 let mut line_number = 1;
478
479 for line in content.lines() {
480 if let Some(captures) = HEADING_REGEX.captures(line) {
482 let level = captures.get(1).unwrap().as_str().len();
483 let text = captures.get(2).unwrap().as_str().trim().to_string();
484
485 doc_info.headings.push(Heading {
486 level,
487 text: text.clone(),
488 line_number,
489 anchor: Some(self.generate_anchor(&text)),
490 });
491 }
492
493 for captures in LINK_REGEX.captures_iter(line) {
495 let text = captures.get(1).unwrap().as_str().to_string();
496 let url = captures.get(2).unwrap().as_str().to_string();
497
498 doc_info.links.push(Link {
499 text,
500 url: url.clone(),
501 line_number,
502 link_type: self.classify_link(&url),
503 });
504 }
505
506 if let Some(captures) = TODO_REGEX.captures(line) {
508 let comment_type = match captures.get(1).unwrap().as_str().to_uppercase().as_str() {
509 "TODO" => TodoType::Todo,
510 "FIXME" => TodoType::Fixme,
511 "NOTE" => TodoType::Note,
512 "BUG" => TodoType::Bug,
513 "HACK" => TodoType::Hack,
514 "WARNING" => TodoType::Warning,
515 _ => TodoType::Todo,
516 };
517
518 let text = captures
519 .get(2)
520 .map_or(String::new(), |m| m.as_str().trim().to_string());
521
522 doc_info.todo_comments.push(TodoComment {
523 comment_type,
524 text,
525 line_number,
526 author: None, });
528 }
529
530 if line.starts_with('|') && line.ends_with('|') {
532 doc_info.tables += 1;
533 }
534 if line.trim_start().starts_with('-')
535 || line.trim_start().starts_with('*')
536 || line
537 .trim_start()
538 .chars()
539 .next()
540 .map_or(false, |c| c.is_digit(10))
541 {
542 doc_info.lists += 1;
543 }
544
545 line_number += 1;
546 }
547
548 for captures in CODE_BLOCK_REGEX.captures_iter(content) {
550 let language = captures.get(1).map(|m| m.as_str().to_string());
551 let content_str = captures.get(2).unwrap().as_str().to_string();
552 let line_count = content_str.lines().count();
553
554 doc_info.code_blocks.push(CodeBlock {
555 language,
556 content: content_str,
557 line_number: 0, line_count,
559 });
560 }
561
562 Ok(doc_info)
563 }
564
565 async fn analyze_complexity_async(
567 &self,
568 content: &str,
569 language: &Language,
570 ) -> Result<ComplexityMetrics> {
571 let mut complexity = ComplexityMetrics::default();
572
573 let lines: Vec<&str> = content.lines().collect();
575
576 for line in &lines {
577 let trimmed = line.trim();
578
579 if self.is_function_declaration(trimmed, language) {
581 complexity.function_count += 1;
582 }
583
584 if self.is_class_declaration(trimmed, language) {
586 complexity.class_count += 1;
587 }
588
589 if self.is_decision_point(trimmed, language) {
591 complexity.cyclomatic_complexity += 1;
592 }
593 }
594
595 complexity.nesting_depth = self.calculate_max_nesting_depth(content, language);
597
598 complexity.halstead_metrics = self.calculate_halstead_metrics(content, language);
600
601 Ok(complexity)
602 }
603
604 async fn analyze_structure_async(
606 &self,
607 content: &str,
608 language: &Language,
609 ) -> Result<StructureInfo> {
610 let mut structure = StructureInfo::default();
611
612 let mut line_number = 1;
615
616 for line in content.lines() {
617 let trimmed = line.trim();
618
619 if let Some(function_info) =
620 self.parse_function_declaration(trimmed, line_number, language)
621 {
622 structure.functions.push(function_info);
623 }
624
625 if let Some(class_info) = self.parse_class_declaration(trimmed, line_number, language) {
626 structure.classes.push(class_info);
627 }
628
629 if let Some(constant_info) =
630 self.parse_constant_declaration(trimmed, line_number, language)
631 {
632 structure.constants.push(constant_info);
633 }
634
635 line_number += 1;
636 }
637
638 Ok(structure)
639 }
640
641 async fn analyze_text_stats_async(&self, content: &str) -> Result<TextStats> {
643 let lines: Vec<&str> = content.lines().collect();
644 let line_count = lines.len();
645 let character_count = content.len();
646 let word_count = content.split_whitespace().count();
647
648 let mut non_empty_line_count = 0;
649 let mut comment_line_count = 0;
650 let mut blank_line_count = 0;
651
652 for line in &lines {
653 let trimmed = line.trim();
654 if trimmed.is_empty() {
655 blank_line_count += 1;
656 } else {
657 non_empty_line_count += 1;
658 if self.is_comment_line(trimmed) {
659 comment_line_count += 1;
660 }
661 }
662 }
663
664 let code_line_count = non_empty_line_count - comment_line_count;
665 let comment_density = if code_line_count > 0 {
666 comment_line_count as f64 / code_line_count as f64
667 } else {
668 0.0
669 };
670
671 Ok(TextStats {
672 line_count,
673 non_empty_line_count,
674 comment_line_count,
675 code_line_count,
676 blank_line_count,
677 character_count,
678 word_count,
679 comment_density,
680 })
681 }
682
683 fn classify_import_type(&self, module: &str) -> ImportType {
685 if module.starts_with('.') || module.starts_with("./") || module.starts_with("../") {
686 ImportType::Relative
687 } else if self.is_standard_library_module(module) {
688 ImportType::Standard
689 } else if module.contains('/') || module.contains('.') {
690 ImportType::External
691 } else {
692 ImportType::Internal
693 }
694 }
695
696 fn is_standard_library_module(&self, module: &str) -> bool {
698 match module {
700 "os" | "sys" | "json" | "re" | "collections" | "itertools" | "functools" => true,
702 "fs" | "path" | "http" | "https" | "url" | "crypto" => true,
704 _ => false,
705 }
706 }
707
708 fn generate_anchor(&self, text: &str) -> String {
710 text.to_lowercase()
711 .chars()
712 .map(|c| if c.is_alphanumeric() { c } else { '-' })
713 .collect::<String>()
714 .split('-')
715 .filter(|s| !s.is_empty())
716 .collect::<Vec<_>>()
717 .join("-")
718 }
719
720 fn classify_link(&self, url: &str) -> LinkType {
722 if url.starts_with("http://") || url.starts_with("https://") {
723 LinkType::External
724 } else if url.starts_with("#") {
725 LinkType::Anchor
726 } else if url.starts_with("./") || url.starts_with("../") {
727 LinkType::Relative
728 } else {
729 LinkType::Internal
730 }
731 }
732
733 fn detect_language_from_path(&self, path: &Path) -> Language {
735 if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
736 Language::from_extension(extension)
737 } else {
738 Language::Unknown
739 }
740 }
741
742 fn is_function_declaration(&self, line: &str, language: &Language) -> bool {
744 match language {
745 Language::Python => line.starts_with("def ") || line.starts_with("async def "),
746 Language::JavaScript | Language::TypeScript => {
747 line.contains("function ") || line.contains("=> ") || line.contains("function(")
748 }
749 Language::Rust => line.starts_with("fn ") || line.starts_with("pub fn "),
750 Language::Java => line.contains("public ") && line.contains("(") && line.contains(")"),
751 _ => false,
752 }
753 }
754
755 fn is_class_declaration(&self, line: &str, language: &Language) -> bool {
757 match language {
758 Language::Python => line.starts_with("class "),
759 Language::JavaScript | Language::TypeScript => line.starts_with("class "),
760 Language::Java => line.contains("class ") && line.contains("{"),
761 Language::Rust => line.starts_with("struct ") || line.starts_with("enum "),
762 _ => false,
763 }
764 }
765
766 fn is_decision_point(&self, line: &str, _language: &Language) -> bool {
768 line.contains("if ")
770 || line.contains("elif ")
771 || line.contains("else ")
772 || line.contains("for ")
773 || line.contains("while ")
774 || line.contains("match ")
775 || line.contains("switch ")
776 || line.contains("case ")
777 || line.contains("catch ")
778 || line.contains("&&")
779 || line.contains("||")
780 || line.contains("?")
781 }
782
783 fn calculate_max_nesting_depth(&self, content: &str, _language: &Language) -> usize {
785 let mut max_depth = 0;
786 let mut current_depth = 0;
787
788 for line in content.lines() {
789 let trimmed = line.trim();
790
791 let opens = trimmed.matches('{').count()
793 + trimmed.matches('(').count()
794 + trimmed.matches('[').count();
795 let closes = trimmed.matches('}').count()
796 + trimmed.matches(')').count()
797 + trimmed.matches(']').count();
798
799 current_depth += opens;
800 max_depth = max_depth.max(current_depth);
801 current_depth = current_depth.saturating_sub(closes);
802 }
803
804 max_depth
805 }
806
807 fn calculate_halstead_metrics(&self, content: &str, _language: &Language) -> HalsteadMetrics {
809 let words: Vec<&str> = content.split_whitespace().collect();
811 let unique_words: HashSet<&str> = words.iter().cloned().collect();
812
813 let operators = [
814 "+", "-", "*", "/", "=", "==", "!=", "&&", "||", "!", "<", ">", "<=", ">=",
815 ];
816 let mut operator_count = 0;
817 let mut unique_operators = HashSet::new();
818
819 for word in &words {
820 for &op in &operators {
821 if word.contains(op) {
822 operator_count += 1;
823 unique_operators.insert(op);
824 }
825 }
826 }
827
828 let distinct_operators = unique_operators.len();
829 let distinct_operands = unique_words.len().saturating_sub(distinct_operators);
830 let total_operators = operator_count;
831 let total_operands = words.len().saturating_sub(operator_count);
832 let vocabulary = distinct_operators + distinct_operands;
833 let length = total_operators + total_operands;
834
835 let difficulty = if distinct_operands > 0 {
836 (distinct_operators as f64 / 2.0) * (total_operands as f64 / distinct_operands as f64)
837 } else {
838 0.0
839 };
840
841 let effort = difficulty * length as f64;
842
843 HalsteadMetrics {
844 distinct_operators,
845 distinct_operands,
846 total_operators,
847 total_operands,
848 vocabulary,
849 length,
850 difficulty,
851 effort,
852 }
853 }
854
855 fn parse_function_declaration(
857 &self,
858 line: &str,
859 line_number: usize,
860 language: &Language,
861 ) -> Option<FunctionInfo> {
862 if !self.is_function_declaration(line, language) {
863 return None;
864 }
865
866 let name = match language {
868 Language::Python => {
869 if let Some(start) = line.find("def ") {
870 let after_def = &line[start + 4..];
871 if let Some(paren_pos) = after_def.find('(') {
872 Some(after_def[..paren_pos].trim().to_string())
873 } else {
874 None
875 }
876 } else {
877 None
878 }
879 }
880 Language::Rust => {
881 if let Some(start) = line.find("fn ") {
882 let after_fn = &line[start + 3..];
883 if let Some(paren_pos) = after_fn.find('(') {
884 Some(after_fn[..paren_pos].trim().to_string())
885 } else {
886 None
887 }
888 } else {
889 None
890 }
891 }
892 _ => None,
893 };
894
895 if let Some(function_name) = name {
896 Some(FunctionInfo {
897 name: function_name,
898 line_number,
899 line_count: 1, parameters: vec![], return_type: None, visibility: Visibility::Unknown,
903 is_async: line.contains("async"),
904 is_generator: line.contains("yield") || line.contains("generator"),
905 docstring: None,
906 })
907 } else {
908 None
909 }
910 }
911
912 fn parse_class_declaration(
914 &self,
915 line: &str,
916 line_number: usize,
917 language: &Language,
918 ) -> Option<ClassInfo> {
919 if !self.is_class_declaration(line, language) {
920 return None;
921 }
922
923 let name = match language {
924 Language::Python => {
925 if let Some(start) = line.find("class ") {
926 let after_class = &line[start + 6..];
927 if let Some(colon_pos) = after_class.find(':') {
928 Some(
929 after_class[..colon_pos]
930 .trim()
931 .split('(')
932 .next()
933 .unwrap()
934 .trim()
935 .to_string(),
936 )
937 } else {
938 None
939 }
940 } else {
941 None
942 }
943 }
944 _ => None,
945 };
946
947 if let Some(class_name) = name {
948 Some(ClassInfo {
949 name: class_name,
950 line_number,
951 line_count: 1, parent_classes: vec![], methods: vec![], attributes: vec![], visibility: Visibility::Unknown,
956 docstring: None,
957 })
958 } else {
959 None
960 }
961 }
962
963 fn parse_constant_declaration(
965 &self,
966 line: &str,
967 line_number: usize,
968 _language: &Language,
969 ) -> Option<ConstantInfo> {
970 if line.contains("const ")
972 || line.contains("final ")
973 || (line.contains("=") && line.to_uppercase() == line)
974 {
975 if let Some(equals_pos) = line.find('=') {
976 let before_equals = line[..equals_pos].trim();
977
978 let tokens: Vec<&str> = before_equals.split_whitespace().collect();
980
981 if tokens.len() >= 2 {
982 if tokens[0] == "const" || tokens[0] == "final" {
984 let name = tokens[1];
985 let clean_name = name.trim_end_matches(':');
987 return Some(ConstantInfo {
988 name: clean_name.to_string(),
989 line_number,
990 value_type: None, visibility: Visibility::Unknown,
992 });
993 }
994 }
995
996 if let Some(name) = tokens.get(1) {
998 let clean_name = name.trim_end_matches(':');
999 return Some(ConstantInfo {
1000 name: clean_name.to_string(),
1001 line_number,
1002 value_type: None,
1003 visibility: Visibility::Unknown,
1004 });
1005 }
1006 }
1007 }
1008 None
1009 }
1010
1011 fn is_comment_line(&self, line: &str) -> bool {
1013 let trimmed = line.trim();
1014 trimmed.starts_with("//")
1015 || trimmed.starts_with('#')
1016 || trimmed.starts_with("/*")
1017 || trimmed.starts_with('*')
1018 || trimmed.starts_with("<!--")
1019 || trimmed.starts_with("--")
1020 }
1021}
1022
1023impl Default for ContentAnalyzer {
1024 fn default() -> Self {
1025 Self::new()
1026 }
1027}
1028
1029#[cfg(test)]
1030mod tests {
1031 use super::*;
1032 use std::fs;
1033 use tempfile::TempDir;
1034
1035 #[tokio::test]
1036 async fn test_content_analyzer_creation() {
1037 let analyzer = ContentAnalyzer::new();
1038 assert!(true); }
1041
1042 #[tokio::test]
1043 async fn test_python_import_analysis() {
1044 let analyzer = ContentAnalyzer::new();
1045 let python_code = r#"
1046import os
1047import sys as system
1048from collections import defaultdict, Counter
1049from .local_module import LocalClass
1050import third_party.package
1051 "#;
1052
1053 let stats = analyzer
1054 .analyze_content(python_code, &Language::Python)
1055 .await
1056 .unwrap();
1057
1058 assert_eq!(stats.imports.total_imports, 5);
1061
1062 assert!(!stats.imports.external_dependencies.contains("os"));
1064 assert!(!stats.imports.external_dependencies.contains("sys"));
1065 assert!(!stats.imports.external_dependencies.contains("collections"));
1066
1067 assert!(stats
1069 .imports
1070 .external_dependencies
1071 .contains("third_party.package"));
1072
1073 assert_eq!(stats.imports.relative_imports, 1);
1074 assert!(stats.imports.absolute_imports > 0);
1075 }
1076
1077 #[tokio::test]
1078 async fn test_documentation_analysis() {
1079 let analyzer = ContentAnalyzer::new();
1080 let markdown_content = r#"
1081# Main Title
1082
1083This is a paragraph with [a link](https://example.com).
1084
1085## Subsection
1086
1087```python
1088def example():
1089 pass
1090```
1091
1092- List item 1
1093- List item 2
1094
1095| Column 1 | Column 2 |
1096|----------|----------|
1097| Data 1 | Data 2 |
1098
1099<!-- TODO: Add more examples -->
1100 "#;
1101
1102 let stats = analyzer
1103 .analyze_content(markdown_content, &Language::Markdown)
1104 .await
1105 .unwrap();
1106
1107 assert_eq!(stats.documentation.headings.len(), 2);
1108 assert_eq!(stats.documentation.headings[0].level, 1);
1109 assert_eq!(stats.documentation.headings[0].text, "Main Title");
1110 assert_eq!(stats.documentation.links.len(), 1);
1111 assert_eq!(stats.documentation.code_blocks.len(), 1);
1112 assert_eq!(stats.documentation.todo_comments.len(), 1);
1113 assert!(stats.documentation.lists > 0);
1114 }
1115
1116 #[tokio::test]
1117 async fn test_text_statistics() {
1118 let analyzer = ContentAnalyzer::new();
1119 let code_content = r#"
1120// This is a comment
1121function example() {
1122 console.log("Hello, world!");
1123 // Another comment
1124 return true;
1125}
1126
1127// Final comment
1128 "#;
1129
1130 let stats = analyzer
1131 .analyze_content(code_content, &Language::JavaScript)
1132 .await
1133 .unwrap();
1134
1135 assert!(stats.text_stats.line_count > 0);
1136 assert!(stats.text_stats.comment_line_count >= 3);
1137 assert!(stats.text_stats.code_line_count > 0);
1138 assert!(stats.text_stats.comment_density > 0.0);
1139 assert!(stats.text_stats.word_count > 0);
1140 }
1141
1142 #[tokio::test]
1143 async fn test_complexity_metrics() {
1144 let analyzer = ContentAnalyzer::new();
1145 let code_content = r#"
1146def complex_function(x, y):
1147 if x > 0:
1148 if y > 0:
1149 for i in range(10):
1150 if i % 2 == 0:
1151 print(i)
1152 else:
1153 while y < 0:
1154 y += 1
1155 return x + y
1156
1157class ExampleClass:
1158 def method1(self):
1159 pass
1160
1161 def method2(self):
1162 pass
1163 "#;
1164
1165 let stats = analyzer
1166 .analyze_content(code_content, &Language::Python)
1167 .await
1168 .unwrap();
1169
1170 assert!(stats.complexity.function_count >= 2);
1171 assert!(stats.complexity.class_count >= 1);
1172 assert!(stats.complexity.cyclomatic_complexity > 0);
1173 assert!(stats.complexity.nesting_depth > 0);
1174 }
1175
1176 #[tokio::test]
1177 async fn test_structure_analysis() {
1178 let analyzer = ContentAnalyzer::new();
1179 let rust_code = r#"
1180pub fn public_function(param: i32) -> bool {
1181 true
1182}
1183
1184fn private_function() {
1185 println!("Hello");
1186}
1187
1188pub struct MyStruct {
1189 field: String,
1190}
1191
1192const CONSTANT_VALUE: i32 = 42;
1193 "#;
1194
1195 let stats = analyzer
1196 .analyze_content(rust_code, &Language::Rust)
1197 .await
1198 .unwrap();
1199
1200 assert_eq!(stats.structure.functions.len(), 2);
1201 assert!(stats
1202 .structure
1203 .functions
1204 .iter()
1205 .any(|f| f.name == "public_function"));
1206 assert!(stats
1207 .structure
1208 .functions
1209 .iter()
1210 .any(|f| f.name == "private_function"));
1211 assert_eq!(stats.structure.constants.len(), 1);
1212 assert_eq!(stats.structure.constants[0].name, "CONSTANT_VALUE");
1213 }
1214
1215 #[tokio::test]
1216 async fn test_file_analysis() {
1217 let temp_dir = TempDir::new().unwrap();
1218 let test_file = temp_dir.path().join("test.py");
1219
1220 let content = r#"
1221"""
1222This is a module docstring.
1223"""
1224import os
1225from collections import defaultdict
1226
1227def greet(name: str) -> str:
1228 """Greet a person by name."""
1229 return f"Hello, {name}!"
1230
1231class Person:
1232 """A simple person class."""
1233 def __init__(self, name: str):
1234 self.name = name
1235
1236 def speak(self):
1237 return self.greet()
1238 "#;
1239
1240 fs::write(&test_file, content).unwrap();
1241
1242 let analyzer = ContentAnalyzer::new();
1243 let stats = analyzer.analyze_file(&test_file).await.unwrap();
1244
1245 assert!(stats.imports.total_imports >= 2);
1246 assert!(stats.structure.functions.len() >= 2);
1247 assert!(stats.structure.classes.len() >= 1);
1248 assert!(stats.text_stats.line_count > 10);
1249 assert!(stats.complexity.function_count >= 2);
1250 }
1251
1252 #[test]
1253 fn test_import_type_classification() {
1254 let analyzer = ContentAnalyzer::new();
1255
1256 assert_eq!(analyzer.classify_import_type("os"), ImportType::Standard);
1257 assert_eq!(
1258 analyzer.classify_import_type("./local"),
1259 ImportType::Relative
1260 );
1261 assert_eq!(
1262 analyzer.classify_import_type("../parent"),
1263 ImportType::Relative
1264 );
1265 assert_eq!(
1266 analyzer.classify_import_type("third_party.package"),
1267 ImportType::External
1268 );
1269 }
1270
1271 #[test]
1272 fn test_link_classification() {
1273 let analyzer = ContentAnalyzer::new();
1274
1275 assert_eq!(
1276 analyzer.classify_link("https://example.com"),
1277 LinkType::External
1278 );
1279 assert_eq!(analyzer.classify_link("#anchor"), LinkType::Anchor);
1280 assert_eq!(
1281 analyzer.classify_link("./relative/path"),
1282 LinkType::Relative
1283 );
1284 assert_eq!(analyzer.classify_link("internal-link"), LinkType::Internal);
1285 }
1286
1287 #[test]
1288 fn test_anchor_generation() {
1289 let analyzer = ContentAnalyzer::new();
1290
1291 assert_eq!(analyzer.generate_anchor("Main Title"), "main-title");
1292 assert_eq!(
1293 analyzer.generate_anchor("Complex Title With Symbols!"),
1294 "complex-title-with-symbols"
1295 );
1296 assert_eq!(
1297 analyzer.generate_anchor("Numbers 123 and More"),
1298 "numbers-123-and-more"
1299 );
1300 }
1301}