1use scribe_core::{Result, ScribeError, Language};
10use scribe_selection::ast_parser::{AstParser, AstLanguage, AstImport};
11use std::path::{Path, PathBuf};
12use std::collections::{HashMap, HashSet};
13use std::fs;
14use regex::Regex;
15use serde::{Serialize, Deserialize};
16use once_cell::sync::Lazy;
17
18#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ContentStats {
21 pub imports: ImportInfo,
22 pub documentation: DocumentationInfo,
23 pub complexity: ComplexityMetrics,
24 pub structure: StructureInfo,
25 pub text_stats: TextStats,
26}
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
30pub struct ImportInfo {
31 pub total_imports: usize,
32 pub unique_imports: usize,
33 pub import_sources: Vec<ImportSource>,
34 pub external_dependencies: HashSet<String>,
35 pub internal_dependencies: HashSet<String>,
36 pub relative_imports: usize,
37 pub absolute_imports: usize,
38}
39
40#[derive(Debug, Clone, Serialize, Deserialize)]
42pub struct ImportSource {
43 pub module: String,
44 pub alias: Option<String>,
45 pub items: Vec<String>,
46 pub line_number: usize,
47 pub import_type: ImportType,
48}
49
50#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
52pub enum ImportType {
53 Standard, External, Internal, Relative, Dynamic, }
59
60#[derive(Debug, Clone, Serialize, Deserialize)]
62pub struct DocumentationInfo {
63 pub headings: Vec<Heading>,
64 pub links: Vec<Link>,
65 pub code_blocks: Vec<CodeBlock>,
66 pub tables: usize,
67 pub lists: usize,
68 pub images: usize,
69 pub todo_comments: Vec<TodoComment>,
70 pub docstrings: Vec<Docstring>,
71}
72
73#[derive(Debug, Clone, Serialize, Deserialize)]
75pub struct Heading {
76 pub level: usize,
77 pub text: String,
78 pub line_number: usize,
79 pub anchor: Option<String>,
80}
81
82#[derive(Debug, Clone, Serialize, Deserialize)]
84pub struct Link {
85 pub text: String,
86 pub url: String,
87 pub line_number: usize,
88 pub link_type: LinkType,
89}
90
91#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
93pub enum LinkType {
94 Internal, External, Relative, Anchor, }
99
100#[derive(Debug, Clone, Serialize, Deserialize)]
102pub struct CodeBlock {
103 pub language: Option<String>,
104 pub content: String,
105 pub line_number: usize,
106 pub line_count: usize,
107}
108
109#[derive(Debug, Clone, Serialize, Deserialize)]
111pub struct TodoComment {
112 pub comment_type: TodoType,
113 pub text: String,
114 pub line_number: usize,
115 pub author: Option<String>,
116}
117
118#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
120pub enum TodoType {
121 Todo,
122 Fixme,
123 Note,
124 Bug,
125 Hack,
126 Warning,
127}
128
129#[derive(Debug, Clone, Serialize, Deserialize)]
131pub struct Docstring {
132 pub content: String,
133 pub line_number: usize,
134 pub line_count: usize,
135 pub style: DocstringStyle,
136}
137
138#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
140pub enum DocstringStyle {
141 Google,
142 Numpy,
143 Sphinx,
144 Rustdoc,
145 Javadoc,
146 JSDoc,
147 Unknown,
148}
149
150#[derive(Debug, Clone, Serialize, Deserialize)]
152pub struct ComplexityMetrics {
153 pub cyclomatic_complexity: usize,
154 pub function_count: usize,
155 pub class_count: usize,
156 pub nesting_depth: usize,
157 pub cognitive_complexity: usize,
158 pub halstead_metrics: HalsteadMetrics,
159}
160
161#[derive(Debug, Clone, Serialize, Deserialize)]
163pub struct HalsteadMetrics {
164 pub distinct_operators: usize,
165 pub distinct_operands: usize,
166 pub total_operators: usize,
167 pub total_operands: usize,
168 pub vocabulary: usize,
169 pub length: usize,
170 pub difficulty: f64,
171 pub effort: f64,
172}
173
174#[derive(Debug, Clone, Serialize, Deserialize)]
176pub struct StructureInfo {
177 pub functions: Vec<FunctionInfo>,
178 pub classes: Vec<ClassInfo>,
179 pub constants: Vec<ConstantInfo>,
180 pub interfaces: Vec<InterfaceInfo>,
181}
182
183#[derive(Debug, Clone, Serialize, Deserialize)]
185pub struct FunctionInfo {
186 pub name: String,
187 pub line_number: usize,
188 pub line_count: usize,
189 pub parameters: Vec<String>,
190 pub return_type: Option<String>,
191 pub visibility: Visibility,
192 pub is_async: bool,
193 pub is_generator: bool,
194 pub docstring: Option<String>,
195}
196
197#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct ClassInfo {
200 pub name: String,
201 pub line_number: usize,
202 pub line_count: usize,
203 pub parent_classes: Vec<String>,
204 pub methods: Vec<FunctionInfo>,
205 pub attributes: Vec<String>,
206 pub visibility: Visibility,
207 pub docstring: Option<String>,
208}
209
210#[derive(Debug, Clone, Serialize, Deserialize)]
212pub struct ConstantInfo {
213 pub name: String,
214 pub line_number: usize,
215 pub value_type: Option<String>,
216 pub visibility: Visibility,
217}
218
219#[derive(Debug, Clone, Serialize, Deserialize)]
221pub struct InterfaceInfo {
222 pub name: String,
223 pub line_number: usize,
224 pub methods: Vec<String>,
225 pub extends: Vec<String>,
226}
227
228#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
230pub enum Visibility {
231 Public,
232 Private,
233 Protected,
234 Package,
235 Unknown,
236}
237
238#[derive(Debug, Clone, Serialize, Deserialize)]
240pub struct TextStats {
241 pub line_count: usize,
242 pub non_empty_line_count: usize,
243 pub comment_line_count: usize,
244 pub code_line_count: usize,
245 pub blank_line_count: usize,
246 pub character_count: usize,
247 pub word_count: usize,
248 pub comment_density: f64, }
250
251pub struct ContentAnalyzer {
253 regex_cache: HashMap<String, Regex>,
254 ast_parser: AstParser,
255}
256
257
258static HEADING_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(#{1,6})\s+(.+)").unwrap());
260static LINK_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").unwrap());
261static TODO_REGEX: Lazy<Regex> = Lazy::new(|| {
262 Regex::new(r"(?i)(?://|#|/\*|\*|<!--)\s*(TODO|FIXME|NOTE|BUG|HACK|WARNING):?\s*(.*)").unwrap()
263});
264static CODE_BLOCK_REGEX: Lazy<Regex> = Lazy::new(|| {
265 Regex::new(r"```(\w+)?\n((?s).*?)```").unwrap()
266});
267
268impl Default for ContentStats {
269 fn default() -> Self {
270 Self {
271 imports: ImportInfo::default(),
272 documentation: DocumentationInfo::default(),
273 complexity: ComplexityMetrics::default(),
274 structure: StructureInfo::default(),
275 text_stats: TextStats::default(),
276 }
277 }
278}
279
280impl Default for ImportInfo {
281 fn default() -> Self {
282 Self {
283 total_imports: 0,
284 unique_imports: 0,
285 import_sources: Vec::new(),
286 external_dependencies: HashSet::new(),
287 internal_dependencies: HashSet::new(),
288 relative_imports: 0,
289 absolute_imports: 0,
290 }
291 }
292}
293
294impl Default for DocumentationInfo {
295 fn default() -> Self {
296 Self {
297 headings: Vec::new(),
298 links: Vec::new(),
299 code_blocks: Vec::new(),
300 tables: 0,
301 lists: 0,
302 images: 0,
303 todo_comments: Vec::new(),
304 docstrings: Vec::new(),
305 }
306 }
307}
308
309impl Default for ComplexityMetrics {
310 fn default() -> Self {
311 Self {
312 cyclomatic_complexity: 0,
313 function_count: 0,
314 class_count: 0,
315 nesting_depth: 0,
316 cognitive_complexity: 0,
317 halstead_metrics: HalsteadMetrics::default(),
318 }
319 }
320}
321
322impl Default for HalsteadMetrics {
323 fn default() -> Self {
324 Self {
325 distinct_operators: 0,
326 distinct_operands: 0,
327 total_operators: 0,
328 total_operands: 0,
329 vocabulary: 0,
330 length: 0,
331 difficulty: 0.0,
332 effort: 0.0,
333 }
334 }
335}
336
337impl Default for StructureInfo {
338 fn default() -> Self {
339 Self {
340 functions: Vec::new(),
341 classes: Vec::new(),
342 constants: Vec::new(),
343 interfaces: Vec::new(),
344 }
345 }
346}
347
348impl Default for TextStats {
349 fn default() -> Self {
350 Self {
351 line_count: 0,
352 non_empty_line_count: 0,
353 comment_line_count: 0,
354 code_line_count: 0,
355 blank_line_count: 0,
356 character_count: 0,
357 word_count: 0,
358 comment_density: 0.0,
359 }
360 }
361}
362
363impl ContentAnalyzer {
364 pub fn new() -> Self {
366 Self {
367 regex_cache: HashMap::new(),
368 ast_parser: AstParser::new().expect("Failed to initialize AST parser"),
369 }
370 }
371
372 pub async fn analyze_file(&self, path: &Path) -> Result<ContentStats> {
374 let content = fs::read_to_string(path)
375 .map_err(|e| ScribeError::io(format!("Failed to read file {}: {}", path.display(), e), e))?;
376
377 let language = self.detect_language_from_path(path);
378 self.analyze_content(&content, &language).await
379 }
380
381 pub async fn analyze_content(&self, content: &str, language: &Language) -> Result<ContentStats> {
383 let mut stats = ContentStats::default();
384
385 let (imports, documentation, complexity, structure, text_stats) = tokio::join!(
387 self.analyze_imports_async(content, language),
388 self.analyze_documentation_async(content),
389 self.analyze_complexity_async(content, language),
390 self.analyze_structure_async(content, language),
391 self.analyze_text_stats_async(content)
392 );
393
394 stats.imports = imports?;
395 stats.documentation = documentation?;
396 stats.complexity = complexity?;
397 stats.structure = structure?;
398 stats.text_stats = text_stats?;
399
400 Ok(stats)
401 }
402
403 async fn analyze_imports_async(&self, content: &str, language: &Language) -> Result<ImportInfo> {
405 let mut import_info = ImportInfo::default();
406
407 let ast_language = match language {
409 Language::Python => Some(AstLanguage::Python),
410 Language::JavaScript => Some(AstLanguage::JavaScript),
411 Language::TypeScript => Some(AstLanguage::TypeScript),
412 Language::Go => Some(AstLanguage::Go),
413 Language::Rust => Some(AstLanguage::Rust),
414 _ => None, };
416
417 if let Some(ast_lang) = ast_language {
418 match self.ast_parser.extract_imports(content, ast_lang) {
420 Ok(imports) => {
421 for (line_number, import) in imports.into_iter().enumerate() {
422 let import_type = self.classify_import_type(&import.module);
423
424 let import_source = ImportSource {
425 module: import.module.clone(),
426 alias: import.alias,
427 items: import.items,
428 line_number: line_number + 1,
429 import_type: import_type.clone(),
430 };
431
432 import_info.import_sources.push(import_source);
433
434 match import_type {
436 ImportType::External => {
437 import_info.external_dependencies.insert(import.module);
438 import_info.absolute_imports += 1;
439 }
440 ImportType::Internal => {
441 import_info.internal_dependencies.insert(import.module);
442 import_info.absolute_imports += 1;
443 }
444 ImportType::Relative => {
445 import_info.relative_imports += 1;
446 }
447 _ => {
448 import_info.absolute_imports += 1;
449 }
450 }
451 }
452
453 import_info.total_imports = import_info.import_sources.len();
454 import_info.unique_imports = import_info.external_dependencies.len() +
455 import_info.internal_dependencies.len();
456 }
457 Err(_) => {
458 }
461 }
462 }
463
464 Ok(import_info)
465 }
466
467 async fn analyze_documentation_async(&self, content: &str) -> Result<DocumentationInfo> {
469 let mut doc_info = DocumentationInfo::default();
470 let mut line_number = 1;
471
472 for line in content.lines() {
473 if let Some(captures) = HEADING_REGEX.captures(line) {
475 let level = captures.get(1).unwrap().as_str().len();
476 let text = captures.get(2).unwrap().as_str().trim().to_string();
477
478 doc_info.headings.push(Heading {
479 level,
480 text: text.clone(),
481 line_number,
482 anchor: Some(self.generate_anchor(&text)),
483 });
484 }
485
486 for captures in LINK_REGEX.captures_iter(line) {
488 let text = captures.get(1).unwrap().as_str().to_string();
489 let url = captures.get(2).unwrap().as_str().to_string();
490
491 doc_info.links.push(Link {
492 text,
493 url: url.clone(),
494 line_number,
495 link_type: self.classify_link(&url),
496 });
497 }
498
499 if let Some(captures) = TODO_REGEX.captures(line) {
501 let comment_type = match captures.get(1).unwrap().as_str().to_uppercase().as_str() {
502 "TODO" => TodoType::Todo,
503 "FIXME" => TodoType::Fixme,
504 "NOTE" => TodoType::Note,
505 "BUG" => TodoType::Bug,
506 "HACK" => TodoType::Hack,
507 "WARNING" => TodoType::Warning,
508 _ => TodoType::Todo,
509 };
510
511 let text = captures.get(2).map_or(String::new(), |m| m.as_str().trim().to_string());
512
513 doc_info.todo_comments.push(TodoComment {
514 comment_type,
515 text,
516 line_number,
517 author: None, });
519 }
520
521 if line.starts_with('|') && line.ends_with('|') {
523 doc_info.tables += 1;
524 }
525 if line.trim_start().starts_with('-') || line.trim_start().starts_with('*') ||
526 line.trim_start().chars().next().map_or(false, |c| c.is_digit(10)) {
527 doc_info.lists += 1;
528 }
529
530 line_number += 1;
531 }
532
533 for captures in CODE_BLOCK_REGEX.captures_iter(content) {
535 let language = captures.get(1).map(|m| m.as_str().to_string());
536 let content_str = captures.get(2).unwrap().as_str().to_string();
537 let line_count = content_str.lines().count();
538
539 doc_info.code_blocks.push(CodeBlock {
540 language,
541 content: content_str,
542 line_number: 0, line_count,
544 });
545 }
546
547 Ok(doc_info)
548 }
549
550 async fn analyze_complexity_async(&self, content: &str, language: &Language) -> Result<ComplexityMetrics> {
552 let mut complexity = ComplexityMetrics::default();
553
554 let lines: Vec<&str> = content.lines().collect();
556
557 for line in &lines {
558 let trimmed = line.trim();
559
560 if self.is_function_declaration(trimmed, language) {
562 complexity.function_count += 1;
563 }
564
565 if self.is_class_declaration(trimmed, language) {
567 complexity.class_count += 1;
568 }
569
570 if self.is_decision_point(trimmed, language) {
572 complexity.cyclomatic_complexity += 1;
573 }
574 }
575
576 complexity.nesting_depth = self.calculate_max_nesting_depth(content, language);
578
579 complexity.halstead_metrics = self.calculate_halstead_metrics(content, language);
581
582 Ok(complexity)
583 }
584
585 async fn analyze_structure_async(&self, content: &str, language: &Language) -> Result<StructureInfo> {
587 let mut structure = StructureInfo::default();
588
589 let mut line_number = 1;
592
593 for line in content.lines() {
594 let trimmed = line.trim();
595
596 if let Some(function_info) = self.parse_function_declaration(trimmed, line_number, language) {
597 structure.functions.push(function_info);
598 }
599
600 if let Some(class_info) = self.parse_class_declaration(trimmed, line_number, language) {
601 structure.classes.push(class_info);
602 }
603
604 if let Some(constant_info) = self.parse_constant_declaration(trimmed, line_number, language) {
605 structure.constants.push(constant_info);
606 }
607
608 line_number += 1;
609 }
610
611 Ok(structure)
612 }
613
614 async fn analyze_text_stats_async(&self, content: &str) -> Result<TextStats> {
616 let lines: Vec<&str> = content.lines().collect();
617 let line_count = lines.len();
618 let character_count = content.len();
619 let word_count = content.split_whitespace().count();
620
621 let mut non_empty_line_count = 0;
622 let mut comment_line_count = 0;
623 let mut blank_line_count = 0;
624
625 for line in &lines {
626 let trimmed = line.trim();
627 if trimmed.is_empty() {
628 blank_line_count += 1;
629 } else {
630 non_empty_line_count += 1;
631 if self.is_comment_line(trimmed) {
632 comment_line_count += 1;
633 }
634 }
635 }
636
637 let code_line_count = non_empty_line_count - comment_line_count;
638 let comment_density = if code_line_count > 0 {
639 comment_line_count as f64 / code_line_count as f64
640 } else {
641 0.0
642 };
643
644 Ok(TextStats {
645 line_count,
646 non_empty_line_count,
647 comment_line_count,
648 code_line_count,
649 blank_line_count,
650 character_count,
651 word_count,
652 comment_density,
653 })
654 }
655
656
657
658 fn classify_import_type(&self, module: &str) -> ImportType {
660 if module.starts_with('.') || module.starts_with("./") || module.starts_with("../") {
661 ImportType::Relative
662 } else if self.is_standard_library_module(module) {
663 ImportType::Standard
664 } else if module.contains('/') || module.contains('.') {
665 ImportType::External
666 } else {
667 ImportType::Internal
668 }
669 }
670
671 fn is_standard_library_module(&self, module: &str) -> bool {
673 match module {
675 "os" | "sys" | "json" | "re" | "collections" | "itertools" | "functools" => true,
677 "fs" | "path" | "http" | "https" | "url" | "crypto" => true,
679 _ => false,
680 }
681 }
682
683 fn generate_anchor(&self, text: &str) -> String {
685 text.to_lowercase()
686 .chars()
687 .map(|c| if c.is_alphanumeric() { c } else { '-' })
688 .collect::<String>()
689 .split('-')
690 .filter(|s| !s.is_empty())
691 .collect::<Vec<_>>()
692 .join("-")
693 }
694
695 fn classify_link(&self, url: &str) -> LinkType {
697 if url.starts_with("http://") || url.starts_with("https://") {
698 LinkType::External
699 } else if url.starts_with("#") {
700 LinkType::Anchor
701 } else if url.starts_with("./") || url.starts_with("../") {
702 LinkType::Relative
703 } else {
704 LinkType::Internal
705 }
706 }
707
708 fn detect_language_from_path(&self, path: &Path) -> Language {
710 if let Some(extension) = path.extension().and_then(|ext| ext.to_str()) {
711 Language::from_extension(extension)
712 } else {
713 Language::Unknown
714 }
715 }
716
717 fn is_function_declaration(&self, line: &str, language: &Language) -> bool {
719 match language {
720 Language::Python => line.starts_with("def ") || line.starts_with("async def "),
721 Language::JavaScript | Language::TypeScript => {
722 line.contains("function ") || line.contains("=> ") || line.contains("function(")
723 }
724 Language::Rust => line.starts_with("fn ") || line.starts_with("pub fn "),
725 Language::Java => line.contains("public ") && line.contains("(") && line.contains(")"),
726 _ => false,
727 }
728 }
729
730 fn is_class_declaration(&self, line: &str, language: &Language) -> bool {
732 match language {
733 Language::Python => line.starts_with("class "),
734 Language::JavaScript | Language::TypeScript => line.starts_with("class "),
735 Language::Java => line.contains("class ") && line.contains("{"),
736 Language::Rust => line.starts_with("struct ") || line.starts_with("enum "),
737 _ => false,
738 }
739 }
740
741 fn is_decision_point(&self, line: &str, _language: &Language) -> bool {
743 line.contains("if ") || line.contains("elif ") || line.contains("else ") ||
745 line.contains("for ") || line.contains("while ") || line.contains("match ") ||
746 line.contains("switch ") || line.contains("case ") || line.contains("catch ") ||
747 line.contains("&&") || line.contains("||") || line.contains("?")
748 }
749
750 fn calculate_max_nesting_depth(&self, content: &str, _language: &Language) -> usize {
752 let mut max_depth = 0;
753 let mut current_depth = 0;
754
755 for line in content.lines() {
756 let trimmed = line.trim();
757
758 let opens = trimmed.matches('{').count() +
760 trimmed.matches('(').count() +
761 trimmed.matches('[').count();
762 let closes = trimmed.matches('}').count() +
763 trimmed.matches(')').count() +
764 trimmed.matches(']').count();
765
766 current_depth += opens;
767 max_depth = max_depth.max(current_depth);
768 current_depth = current_depth.saturating_sub(closes);
769 }
770
771 max_depth
772 }
773
774 fn calculate_halstead_metrics(&self, content: &str, _language: &Language) -> HalsteadMetrics {
776 let words: Vec<&str> = content.split_whitespace().collect();
778 let unique_words: HashSet<&str> = words.iter().cloned().collect();
779
780 let operators = ["+", "-", "*", "/", "=", "==", "!=", "&&", "||", "!", "<", ">", "<=", ">="];
781 let mut operator_count = 0;
782 let mut unique_operators = HashSet::new();
783
784 for word in &words {
785 for &op in &operators {
786 if word.contains(op) {
787 operator_count += 1;
788 unique_operators.insert(op);
789 }
790 }
791 }
792
793 let distinct_operators = unique_operators.len();
794 let distinct_operands = unique_words.len().saturating_sub(distinct_operators);
795 let total_operators = operator_count;
796 let total_operands = words.len().saturating_sub(operator_count);
797 let vocabulary = distinct_operators + distinct_operands;
798 let length = total_operators + total_operands;
799
800 let difficulty = if distinct_operands > 0 {
801 (distinct_operators as f64 / 2.0) * (total_operands as f64 / distinct_operands as f64)
802 } else {
803 0.0
804 };
805
806 let effort = difficulty * length as f64;
807
808 HalsteadMetrics {
809 distinct_operators,
810 distinct_operands,
811 total_operators,
812 total_operands,
813 vocabulary,
814 length,
815 difficulty,
816 effort,
817 }
818 }
819
820 fn parse_function_declaration(&self, line: &str, line_number: usize, language: &Language) -> Option<FunctionInfo> {
822 if !self.is_function_declaration(line, language) {
823 return None;
824 }
825
826 let name = match language {
828 Language::Python => {
829 if let Some(start) = line.find("def ") {
830 let after_def = &line[start + 4..];
831 if let Some(paren_pos) = after_def.find('(') {
832 Some(after_def[..paren_pos].trim().to_string())
833 } else {
834 None
835 }
836 } else {
837 None
838 }
839 }
840 Language::Rust => {
841 if let Some(start) = line.find("fn ") {
842 let after_fn = &line[start + 3..];
843 if let Some(paren_pos) = after_fn.find('(') {
844 Some(after_fn[..paren_pos].trim().to_string())
845 } else {
846 None
847 }
848 } else {
849 None
850 }
851 }
852 _ => None,
853 };
854
855 if let Some(function_name) = name {
856 Some(FunctionInfo {
857 name: function_name,
858 line_number,
859 line_count: 1, parameters: vec![], return_type: None, visibility: Visibility::Unknown,
863 is_async: line.contains("async"),
864 is_generator: line.contains("yield") || line.contains("generator"),
865 docstring: None,
866 })
867 } else {
868 None
869 }
870 }
871
872 fn parse_class_declaration(&self, line: &str, line_number: usize, language: &Language) -> Option<ClassInfo> {
874 if !self.is_class_declaration(line, language) {
875 return None;
876 }
877
878 let name = match language {
879 Language::Python => {
880 if let Some(start) = line.find("class ") {
881 let after_class = &line[start + 6..];
882 if let Some(colon_pos) = after_class.find(':') {
883 Some(after_class[..colon_pos].trim().split('(').next().unwrap().trim().to_string())
884 } else {
885 None
886 }
887 } else {
888 None
889 }
890 }
891 _ => None,
892 };
893
894 if let Some(class_name) = name {
895 Some(ClassInfo {
896 name: class_name,
897 line_number,
898 line_count: 1, parent_classes: vec![], methods: vec![], attributes: vec![], visibility: Visibility::Unknown,
903 docstring: None,
904 })
905 } else {
906 None
907 }
908 }
909
910 fn parse_constant_declaration(&self, line: &str, line_number: usize, _language: &Language) -> Option<ConstantInfo> {
912 if line.contains("const ") || line.contains("final ") || (line.contains("=") && line.to_uppercase() == line) {
914 if let Some(equals_pos) = line.find('=') {
915 let before_equals = line[..equals_pos].trim();
916
917 let tokens: Vec<&str> = before_equals.split_whitespace().collect();
919
920 if tokens.len() >= 2 {
921 if tokens[0] == "const" || tokens[0] == "final" {
923 let name = tokens[1];
924 let clean_name = name.trim_end_matches(':');
926 return Some(ConstantInfo {
927 name: clean_name.to_string(),
928 line_number,
929 value_type: None, visibility: Visibility::Unknown,
931 });
932 }
933 }
934
935 if let Some(name) = tokens.get(1) {
937 let clean_name = name.trim_end_matches(':');
938 return Some(ConstantInfo {
939 name: clean_name.to_string(),
940 line_number,
941 value_type: None,
942 visibility: Visibility::Unknown,
943 });
944 }
945 }
946 }
947 None
948 }
949
950 fn is_comment_line(&self, line: &str) -> bool {
952 let trimmed = line.trim();
953 trimmed.starts_with("//") || trimmed.starts_with('#') ||
954 trimmed.starts_with("/*") || trimmed.starts_with('*') ||
955 trimmed.starts_with("<!--") || trimmed.starts_with("--")
956 }
957}
958
959impl Default for ContentAnalyzer {
960 fn default() -> Self {
961 Self::new()
962 }
963}
964
965#[cfg(test)]
966mod tests {
967 use super::*;
968 use tempfile::TempDir;
969 use std::fs;
970
971 #[tokio::test]
972 async fn test_content_analyzer_creation() {
973 let analyzer = ContentAnalyzer::new();
974 assert!(true); }
977
978 #[tokio::test]
979 async fn test_python_import_analysis() {
980 let analyzer = ContentAnalyzer::new();
981 let python_code = r#"
982import os
983import sys as system
984from collections import defaultdict, Counter
985from .local_module import LocalClass
986import third_party.package
987 "#;
988
989 let stats = analyzer.analyze_content(python_code, &Language::Python).await.unwrap();
990
991 assert_eq!(stats.imports.total_imports, 5);
994
995 assert!(!stats.imports.external_dependencies.contains("os"));
997 assert!(!stats.imports.external_dependencies.contains("sys"));
998 assert!(!stats.imports.external_dependencies.contains("collections"));
999
1000 assert!(stats.imports.external_dependencies.contains("third_party.package"));
1002
1003 assert_eq!(stats.imports.relative_imports, 1);
1004 assert!(stats.imports.absolute_imports > 0);
1005 }
1006
1007 #[tokio::test]
1008 async fn test_documentation_analysis() {
1009 let analyzer = ContentAnalyzer::new();
1010 let markdown_content = r#"
1011# Main Title
1012
1013This is a paragraph with [a link](https://example.com).
1014
1015## Subsection
1016
1017```python
1018def example():
1019 pass
1020```
1021
1022- List item 1
1023- List item 2
1024
1025| Column 1 | Column 2 |
1026|----------|----------|
1027| Data 1 | Data 2 |
1028
1029<!-- TODO: Add more examples -->
1030 "#;
1031
1032 let stats = analyzer.analyze_content(markdown_content, &Language::Markdown).await.unwrap();
1033
1034 assert_eq!(stats.documentation.headings.len(), 2);
1035 assert_eq!(stats.documentation.headings[0].level, 1);
1036 assert_eq!(stats.documentation.headings[0].text, "Main Title");
1037 assert_eq!(stats.documentation.links.len(), 1);
1038 assert_eq!(stats.documentation.code_blocks.len(), 1);
1039 assert_eq!(stats.documentation.todo_comments.len(), 1);
1040 assert!(stats.documentation.lists > 0);
1041 }
1042
1043 #[tokio::test]
1044 async fn test_text_statistics() {
1045 let analyzer = ContentAnalyzer::new();
1046 let code_content = r#"
1047// This is a comment
1048function example() {
1049 console.log("Hello, world!");
1050 // Another comment
1051 return true;
1052}
1053
1054// Final comment
1055 "#;
1056
1057 let stats = analyzer.analyze_content(code_content, &Language::JavaScript).await.unwrap();
1058
1059 assert!(stats.text_stats.line_count > 0);
1060 assert!(stats.text_stats.comment_line_count >= 3);
1061 assert!(stats.text_stats.code_line_count > 0);
1062 assert!(stats.text_stats.comment_density > 0.0);
1063 assert!(stats.text_stats.word_count > 0);
1064 }
1065
1066 #[tokio::test]
1067 async fn test_complexity_metrics() {
1068 let analyzer = ContentAnalyzer::new();
1069 let code_content = r#"
1070def complex_function(x, y):
1071 if x > 0:
1072 if y > 0:
1073 for i in range(10):
1074 if i % 2 == 0:
1075 print(i)
1076 else:
1077 while y < 0:
1078 y += 1
1079 return x + y
1080
1081class ExampleClass:
1082 def method1(self):
1083 pass
1084
1085 def method2(self):
1086 pass
1087 "#;
1088
1089 let stats = analyzer.analyze_content(code_content, &Language::Python).await.unwrap();
1090
1091 assert!(stats.complexity.function_count >= 2);
1092 assert!(stats.complexity.class_count >= 1);
1093 assert!(stats.complexity.cyclomatic_complexity > 0);
1094 assert!(stats.complexity.nesting_depth > 0);
1095 }
1096
1097 #[tokio::test]
1098 async fn test_structure_analysis() {
1099 let analyzer = ContentAnalyzer::new();
1100 let rust_code = r#"
1101pub fn public_function(param: i32) -> bool {
1102 true
1103}
1104
1105fn private_function() {
1106 println!("Hello");
1107}
1108
1109pub struct MyStruct {
1110 field: String,
1111}
1112
1113const CONSTANT_VALUE: i32 = 42;
1114 "#;
1115
1116 let stats = analyzer.analyze_content(rust_code, &Language::Rust).await.unwrap();
1117
1118 assert_eq!(stats.structure.functions.len(), 2);
1119 assert!(stats.structure.functions.iter().any(|f| f.name == "public_function"));
1120 assert!(stats.structure.functions.iter().any(|f| f.name == "private_function"));
1121 assert_eq!(stats.structure.constants.len(), 1);
1122 assert_eq!(stats.structure.constants[0].name, "CONSTANT_VALUE");
1123 }
1124
1125 #[tokio::test]
1126 async fn test_file_analysis() {
1127 let temp_dir = TempDir::new().unwrap();
1128 let test_file = temp_dir.path().join("test.py");
1129
1130 let content = r#"
1131"""
1132This is a module docstring.
1133"""
1134import os
1135from collections import defaultdict
1136
1137def greet(name: str) -> str:
1138 """Greet a person by name."""
1139 return f"Hello, {name}!"
1140
1141class Person:
1142 """A simple person class."""
1143 def __init__(self, name: str):
1144 self.name = name
1145
1146 def speak(self):
1147 return self.greet()
1148 "#;
1149
1150 fs::write(&test_file, content).unwrap();
1151
1152 let analyzer = ContentAnalyzer::new();
1153 let stats = analyzer.analyze_file(&test_file).await.unwrap();
1154
1155 assert!(stats.imports.total_imports >= 2);
1156 assert!(stats.structure.functions.len() >= 2);
1157 assert!(stats.structure.classes.len() >= 1);
1158 assert!(stats.text_stats.line_count > 10);
1159 assert!(stats.complexity.function_count >= 2);
1160 }
1161
1162 #[test]
1163 fn test_import_type_classification() {
1164 let analyzer = ContentAnalyzer::new();
1165
1166 assert_eq!(analyzer.classify_import_type("os"), ImportType::Standard);
1167 assert_eq!(analyzer.classify_import_type("./local"), ImportType::Relative);
1168 assert_eq!(analyzer.classify_import_type("../parent"), ImportType::Relative);
1169 assert_eq!(analyzer.classify_import_type("third_party.package"), ImportType::External);
1170 }
1171
1172 #[test]
1173 fn test_link_classification() {
1174 let analyzer = ContentAnalyzer::new();
1175
1176 assert_eq!(analyzer.classify_link("https://example.com"), LinkType::External);
1177 assert_eq!(analyzer.classify_link("#anchor"), LinkType::Anchor);
1178 assert_eq!(analyzer.classify_link("./relative/path"), LinkType::Relative);
1179 assert_eq!(analyzer.classify_link("internal-link"), LinkType::Internal);
1180 }
1181
1182 #[test]
1183 fn test_anchor_generation() {
1184 let analyzer = ContentAnalyzer::new();
1185
1186 assert_eq!(analyzer.generate_anchor("Main Title"), "main-title");
1187 assert_eq!(analyzer.generate_anchor("Complex Title With Symbols!"), "complex-title-with-symbols");
1188 assert_eq!(analyzer.generate_anchor("Numbers 123 and More"), "numbers-123-and-more");
1189 }
1190}