1use super::{CommentContext, ContentChunk, ContentType};
7use crate::ast::{Language, NodeId, Span};
8use anyhow::{anyhow, Result};
9use regex::Regex;
10use std::collections::HashMap;
11use std::path::Path;
12use tree_sitter::Tree;
13
14pub struct CommentExtractor {
16 language_extractors: HashMap<Language, Box<dyn LanguageCommentExtractor>>,
18}
19
20impl CommentExtractor {
21 pub fn new() -> Self {
23 let mut extractors: HashMap<Language, Box<dyn LanguageCommentExtractor>> = HashMap::new();
24
25 extractors.insert(
27 Language::JavaScript,
28 Box::new(JavaScriptCommentExtractor::new()),
29 );
30 extractors.insert(
31 Language::TypeScript,
32 Box::new(JavaScriptCommentExtractor::new()),
33 );
34 extractors.insert(Language::Python, Box::new(PythonCommentExtractor::new()));
35 extractors.insert(Language::Java, Box::new(JavaCommentExtractor::new()));
36 extractors.insert(Language::Rust, Box::new(RustCommentExtractor::new()));
37 extractors.insert(Language::C, Box::new(CCommentExtractor::new()));
38 extractors.insert(Language::Cpp, Box::new(CCommentExtractor::new()));
39
40 Self {
41 language_extractors: extractors,
42 }
43 }
44
45 pub fn extract_comments(
47 &self,
48 language: Language,
49 tree: &Tree,
50 source: &str,
51 file_path: &Path,
52 ast_nodes: &[NodeId],
53 ) -> Result<Vec<ContentChunk>> {
54 let extractor = self
55 .language_extractors
56 .get(&language)
57 .ok_or_else(|| anyhow!("No comment extractor for language: {:?}", language))?;
58
59 extractor.extract_comments(tree, source, file_path, ast_nodes)
60 }
61
62 pub fn supports_language(&self, language: Language) -> bool {
64 self.language_extractors.contains_key(&language)
65 }
66
67 pub fn supported_languages(&self) -> Vec<Language> {
69 self.language_extractors.keys().copied().collect()
70 }
71}
72
73impl Default for CommentExtractor {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79pub trait LanguageCommentExtractor: Send + Sync {
81 fn extract_comments(
83 &self,
84 tree: &Tree,
85 source: &str,
86 file_path: &Path,
87 ast_nodes: &[NodeId],
88 ) -> Result<Vec<ContentChunk>>;
89
90 fn comment_patterns(&self) -> &CommentPatterns;
92}
93
94#[derive(Debug, Clone)]
96pub struct CommentPatterns {
97 pub single_line: Vec<String>,
99 pub block: Vec<(String, String)>,
101 pub documentation: Vec<String>,
103}
104
105pub struct JavaScriptCommentExtractor {
107 patterns: CommentPatterns,
108 comment_regex: Regex,
109}
110
111impl Default for JavaScriptCommentExtractor {
112 fn default() -> Self {
113 Self::new()
114 }
115}
116
117impl JavaScriptCommentExtractor {
118 pub fn new() -> Self {
119 Self {
120 patterns: CommentPatterns {
121 single_line: vec!["//".to_string()],
122 block: vec![("/*".to_string(), "*/".to_string())],
123 documentation: vec!["/**".to_string(), "///".to_string()],
124 },
125 comment_regex: Regex::new(r"(?m)//.*$|/\*[\s\S]*?\*/").unwrap(),
126 }
127 }
128}
129
130impl LanguageCommentExtractor for JavaScriptCommentExtractor {
131 fn extract_comments(
132 &self,
133 _tree: &Tree,
134 source: &str,
135 file_path: &Path,
136 _ast_nodes: &[NodeId],
137 ) -> Result<Vec<ContentChunk>> {
138 let mut chunks = Vec::new();
139 let mut chunk_index = 0;
140
141 for comment_match in self.comment_regex.find_iter(source) {
143 let comment_text = comment_match.as_str();
144 let span = self.calculate_match_span(&comment_match, source);
145
146 let cleaned_text = if comment_text.starts_with("/**") {
148 self.clean_jsdoc_comment(comment_text)
149 } else if comment_text.starts_with("/*") {
150 self.clean_block_comment(comment_text)
151 } else {
152 self.clean_single_line_comment(comment_text)
153 };
154
155 if cleaned_text.trim().is_empty() {
157 continue;
158 }
159
160 let context = if comment_text.starts_with("/**") {
161 CommentContext::Documentation
162 } else if comment_text.starts_with("/*") {
163 CommentContext::Block
164 } else {
165 CommentContext::Inline
166 };
167
168 let content_type = ContentType::Comment {
169 language: Language::JavaScript,
170 context,
171 };
172
173 let chunk = ContentChunk::new(
174 file_path.to_path_buf(),
175 content_type,
176 cleaned_text,
177 span,
178 chunk_index,
179 )
180 .with_metadata(serde_json::json!({
181 "raw_text": comment_text,
182 "language": "javascript"
183 }));
184
185 chunks.push(chunk);
186 chunk_index += 1;
187 }
188
189 Ok(chunks)
190 }
191
192 fn comment_patterns(&self) -> &CommentPatterns {
193 &self.patterns
194 }
195}
196
197impl JavaScriptCommentExtractor {
198 fn clean_jsdoc_comment(&self, comment: &str) -> String {
200 comment
201 .trim_start_matches("/**")
202 .trim_end_matches("*/")
203 .lines()
204 .map(|line| line.trim().trim_start_matches('*').trim())
205 .filter(|line| !line.is_empty())
206 .collect::<Vec<_>>()
207 .join("\n")
208 }
209
210 fn clean_block_comment(&self, comment: &str) -> String {
212 comment
213 .trim_start_matches("/*")
214 .trim_end_matches("*/")
215 .trim()
216 .to_string()
217 }
218
219 fn clean_single_line_comment(&self, comment: &str) -> String {
221 comment.trim_start_matches("//").trim().to_string()
222 }
223
224 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
226 let start_byte = match_obj.start();
227 let end_byte = match_obj.end();
228
229 let source_before = &source[..start_byte];
230 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
232 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
233
234 let match_content = match_obj.as_str();
235 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
236 let end_line = start_line + lines_in_match;
237 let end_column = if lines_in_match > 0 {
238 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
239 } else {
240 start_column + match_content.len()
241 };
242
243 Span::new(
244 start_byte,
245 end_byte,
246 start_line,
247 end_line,
248 start_column,
249 end_column,
250 )
251 }
252}
253
254pub struct PythonCommentExtractor {
256 patterns: CommentPatterns,
257 comment_regex: Regex,
258 docstring_regex: Regex,
259}
260
261impl Default for PythonCommentExtractor {
262 fn default() -> Self {
263 Self::new()
264 }
265}
266
267impl PythonCommentExtractor {
268 pub fn new() -> Self {
269 Self {
270 patterns: CommentPatterns {
271 single_line: vec!["#".to_string()],
272 block: vec![
273 ("\"\"\"".to_string(), "\"\"\"".to_string()),
274 ("'''".to_string(), "'''".to_string()),
275 ],
276 documentation: vec!["\"\"\"".to_string(), "'''".to_string()],
277 },
278 comment_regex: Regex::new(r"(?m)#.*$").unwrap(),
279 docstring_regex: Regex::new(r#"("""[\s\S]*?"""|'''[\s\S]*?''')"#).unwrap(),
280 }
281 }
282}
283
284impl LanguageCommentExtractor for PythonCommentExtractor {
285 fn extract_comments(
286 &self,
287 _tree: &Tree,
288 source: &str,
289 file_path: &Path,
290 _ast_nodes: &[NodeId],
291 ) -> Result<Vec<ContentChunk>> {
292 let mut chunks = Vec::new();
293 let mut chunk_index = 0;
294
295 for comment_match in self.comment_regex.find_iter(source) {
297 let comment_text = comment_match.as_str();
298 let cleaned_text = comment_text.trim_start_matches('#').trim().to_string();
299
300 if cleaned_text.is_empty() {
301 continue;
302 }
303
304 let span = self.calculate_match_span(&comment_match, source);
305 let content_type = ContentType::Comment {
306 language: Language::Python,
307 context: CommentContext::Inline,
308 };
309
310 let chunk = ContentChunk::new(
311 file_path.to_path_buf(),
312 content_type,
313 cleaned_text,
314 span,
315 chunk_index,
316 )
317 .with_metadata(serde_json::json!({
318 "raw_text": comment_text,
319 "language": "python"
320 }));
321
322 chunks.push(chunk);
323 chunk_index += 1;
324 }
325
326 for docstring_match in self.docstring_regex.find_iter(source) {
328 let docstring_text = docstring_match.as_str();
329 let cleaned_text = self.clean_docstring(docstring_text);
330
331 if cleaned_text.is_empty() {
332 continue;
333 }
334
335 let span = self.calculate_match_span(&docstring_match, source);
336 let content_type = ContentType::Comment {
337 language: Language::Python,
338 context: CommentContext::Documentation,
339 };
340
341 let chunk = ContentChunk::new(
342 file_path.to_path_buf(),
343 content_type,
344 cleaned_text,
345 span,
346 chunk_index,
347 )
348 .with_metadata(serde_json::json!({
349 "raw_text": docstring_text,
350 "language": "python"
351 }));
352
353 chunks.push(chunk);
354 chunk_index += 1;
355 }
356
357 Ok(chunks)
358 }
359
360 fn comment_patterns(&self) -> &CommentPatterns {
361 &self.patterns
362 }
363}
364
365impl PythonCommentExtractor {
366 fn clean_docstring(&self, docstring: &str) -> String {
368 let cleaned = if docstring.starts_with("\"\"\"") {
369 docstring
370 .trim_start_matches("\"\"\"")
371 .trim_end_matches("\"\"\"")
372 } else {
373 docstring.trim_start_matches("'''").trim_end_matches("'''")
374 };
375
376 cleaned.trim().to_string()
377 }
378
379 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
381 let start_byte = match_obj.start();
382 let end_byte = match_obj.end();
383
384 let source_before = &source[..start_byte];
385 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
387 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
388
389 let match_content = match_obj.as_str();
390 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
391 let end_line = start_line + lines_in_match;
392 let end_column = if lines_in_match > 0 {
393 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
394 } else {
395 start_column + match_content.len()
396 };
397
398 Span::new(
399 start_byte,
400 end_byte,
401 start_line,
402 end_line,
403 start_column,
404 end_column,
405 )
406 }
407}
408
409macro_rules! simple_comment_extractor {
411 ($name:ident, $language:ident, $single_line:expr, $block_start:expr, $block_end:expr) => {
412 pub struct $name {
413 patterns: CommentPatterns,
414 }
415
416 impl Default for $name {
417 fn default() -> Self {
418 Self::new()
419 }
420 }
421
422 impl $name {
423 pub fn new() -> Self {
424 Self {
425 patterns: CommentPatterns {
426 single_line: vec![$single_line.to_string()],
427 block: vec![($block_start.to_string(), $block_end.to_string())],
428 documentation: vec![],
429 },
430 }
431 }
432 }
433
434 impl LanguageCommentExtractor for $name {
435 fn extract_comments(
436 &self,
437 _tree: &Tree,
438 source: &str,
439 file_path: &Path,
440 _ast_nodes: &[NodeId],
441 ) -> Result<Vec<ContentChunk>> {
442 let mut chunks = Vec::new();
443 let single_line_regex =
444 Regex::new(&format!(r"(?m){}.*$", regex::escape($single_line))).unwrap();
445 let block_regex = Regex::new(&format!(
446 r"{}[\s\S]*?{}",
447 regex::escape($block_start),
448 regex::escape($block_end)
449 ))
450 .unwrap();
451
452 let mut chunk_index = 0;
453
454 for comment_match in single_line_regex.find_iter(source) {
456 let comment_text = comment_match.as_str();
457 let cleaned_text = comment_text
458 .trim_start_matches($single_line)
459 .trim()
460 .to_string();
461
462 if cleaned_text.is_empty() {
463 continue;
464 }
465
466 let span = self.calculate_match_span(&comment_match, source);
467 let content_type = ContentType::Comment {
468 language: Language::$language,
469 context: CommentContext::Inline,
470 };
471
472 let chunk = ContentChunk::new(
473 file_path.to_path_buf(),
474 content_type,
475 cleaned_text,
476 span,
477 chunk_index,
478 );
479
480 chunks.push(chunk);
481 chunk_index += 1;
482 }
483
484 for comment_match in block_regex.find_iter(source) {
486 let comment_text = comment_match.as_str();
487 let cleaned_text = comment_text
488 .trim_start_matches($block_start)
489 .trim_end_matches($block_end)
490 .trim()
491 .to_string();
492
493 if cleaned_text.is_empty() {
494 continue;
495 }
496
497 let span = self.calculate_match_span(&comment_match, source);
498 let content_type = ContentType::Comment {
499 language: Language::$language,
500 context: CommentContext::Block,
501 };
502
503 let chunk = ContentChunk::new(
504 file_path.to_path_buf(),
505 content_type,
506 cleaned_text,
507 span,
508 chunk_index,
509 );
510
511 chunks.push(chunk);
512 chunk_index += 1;
513 }
514
515 Ok(chunks)
516 }
517
518 fn comment_patterns(&self) -> &CommentPatterns {
519 &self.patterns
520 }
521 }
522
523 impl $name {
524 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
525 let start_byte = match_obj.start();
526 let end_byte = match_obj.end();
527
528 let source_before = &source[..start_byte];
529 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
531 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
532
533 let match_content = match_obj.as_str();
534 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
535 let end_line = start_line + lines_in_match;
536 let end_column = if lines_in_match > 0 {
537 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
538 } else {
539 start_column + match_content.len()
540 };
541
542 Span::new(
543 start_byte,
544 end_byte,
545 start_line,
546 end_line,
547 start_column,
548 end_column,
549 )
550 }
551 }
552 };
553}
554
555simple_comment_extractor!(JavaCommentExtractor, Java, "//", "/*", "*/");
557simple_comment_extractor!(RustCommentExtractor, Rust, "//", "/*", "*/");
558simple_comment_extractor!(CCommentExtractor, C, "//", "/*", "*/");
559
560#[cfg(test)]
561mod tests {
562 use super::*;
563
564 #[test]
565 fn test_comment_extractor_creation() {
566 let extractor = CommentExtractor::new();
567 assert!(extractor.supports_language(Language::JavaScript));
568 assert!(extractor.supports_language(Language::Python));
569 assert!(extractor.supports_language(Language::Rust));
570 assert!(!extractor.supports_language(Language::Unknown));
571
572 let supported = extractor.supported_languages();
573 assert!(supported.contains(&Language::JavaScript));
574 assert!(supported.contains(&Language::Python));
575 }
576
577 #[test]
578 fn test_javascript_comment_patterns() {
579 let extractor = JavaScriptCommentExtractor::new();
580 let patterns = extractor.comment_patterns();
581
582 assert!(patterns.single_line.contains(&"//".to_string()));
583 assert!(patterns
584 .block
585 .contains(&("/*".to_string(), "*/".to_string())));
586 assert!(patterns.documentation.contains(&"/**".to_string()));
587 }
588
589 #[test]
590 fn test_python_comment_patterns() {
591 let extractor = PythonCommentExtractor::new();
592 let patterns = extractor.comment_patterns();
593
594 assert!(patterns.single_line.contains(&"#".to_string()));
595 assert!(patterns
596 .block
597 .contains(&("\"\"\"".to_string(), "\"\"\"".to_string())));
598 assert!(patterns.documentation.contains(&"\"\"\"".to_string()));
599 }
600
601 #[test]
602 fn test_comment_pattern_matching() {
603 let js_extractor = JavaScriptCommentExtractor::new();
604
605 let source = "// Single line comment\n/* Block comment */";
607 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
608 assert_eq!(matches.len(), 2, "Should find 2 comment matches");
609
610 assert_eq!(matches[0].as_str(), "// Single line comment");
611 assert_eq!(matches[1].as_str(), "/* Block comment */");
612 }
613
614 #[test]
615 fn test_comment_cleaning() {
616 let js_extractor = JavaScriptCommentExtractor::new();
617
618 let jsdoc = "/**\n * This is a JSDoc comment\n * @param value The input value\n */";
620 let cleaned = js_extractor.clean_jsdoc_comment(jsdoc);
621 assert!(cleaned.contains("This is a JSDoc comment"));
622 assert!(cleaned.contains("@param value The input value"));
623 assert!(!cleaned.contains("/**"));
624 assert!(!cleaned.contains("*/"));
625
626 let block = "/* This is a block comment */";
628 let cleaned = js_extractor.clean_block_comment(block);
629 assert_eq!(cleaned, "This is a block comment");
630
631 let single = "// This is a single line comment";
633 let cleaned = js_extractor.clean_single_line_comment(single);
634 assert_eq!(cleaned, "This is a single line comment");
635 }
636
637 #[test]
638 fn test_python_docstring_cleaning() {
639 let py_extractor = PythonCommentExtractor::new();
640
641 let docstring = "\"\"\"This is a docstring\nwith multiple lines\"\"\"";
643 let cleaned = py_extractor.clean_docstring(docstring);
644 assert!(cleaned.contains("This is a docstring"));
645 assert!(!cleaned.contains("\"\"\""));
646
647 let docstring = "'''This is another docstring'''";
649 let cleaned = py_extractor.clean_docstring(docstring);
650 assert_eq!(cleaned, "This is another docstring");
651 }
652
653 #[test]
654 fn test_span_calculation() {
655 let js_extractor = JavaScriptCommentExtractor::new();
656 let source = "const x = 5;\n// This is a comment\nconst y = 10;";
657
658 if let Some(comment_match) = js_extractor.comment_regex.find(source) {
659 let span = js_extractor.calculate_match_span(&comment_match, source);
660
661 assert_eq!(span.start_line, 2);
662 assert_eq!(span.end_line, 2);
663 assert!(span.start_column >= 1);
664 assert!(span.end_column > span.start_column);
665 assert_eq!(comment_match.as_str(), "// This is a comment");
666 } else {
667 panic!("Should find comment in source");
668 }
669 }
670
671 #[test]
672 fn test_regex_edge_cases() {
673 let js_extractor = JavaScriptCommentExtractor::new();
674
675 let source = "/* outer /* inner */ comment */";
677 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
678 assert!(
679 matches.len() >= 1,
680 "Should handle nested comments gracefully"
681 );
682
683 let source = "const x = 5; // Comment at end";
685 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
686 assert_eq!(matches.len(), 1);
687 assert_eq!(matches[0].as_str(), "// Comment at end");
688
689 let source = "// \n/* */";
691 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
692 assert_eq!(matches.len(), 2);
693 }
694
695 #[test]
696 fn test_comprehensive_regex_edge_cases() {
697 let js_extractor = JavaScriptCommentExtractor::new();
698
699 let multiline_source = r#"
701const x = 1; // Comment on line 2
702// Another comment on line 3
703/* Block comment
704 spanning multiple
705 lines */
706const y = 2; // Final comment
707"#;
708
709 let matches: Vec<_> = js_extractor
710 .comment_regex
711 .find_iter(multiline_source)
712 .collect();
713 assert!(
714 matches.len() >= 4,
715 "Should find all comment types including multiline block"
716 );
717
718 let comment_texts: Vec<&str> = matches.iter().map(|m| m.as_str()).collect();
720 assert!(comment_texts
721 .iter()
722 .any(|&text| text.contains("Comment on line 2")));
723 assert!(comment_texts
724 .iter()
725 .any(|&text| text.contains("Another comment")));
726 assert!(comment_texts
727 .iter()
728 .any(|&text| text.contains("spanning multiple")));
729 assert!(comment_texts
730 .iter()
731 .any(|&text| text.contains("Final comment")));
732 }
733
734 #[test]
735 fn test_main_comment_extractor() {
736 let extractor = CommentExtractor::new();
737
738 assert!(extractor.supports_language(Language::JavaScript));
740 assert!(extractor.supports_language(Language::Python));
741 assert!(extractor.supports_language(Language::Rust));
742 assert!(extractor.supports_language(Language::Java));
743 assert!(extractor.supports_language(Language::C));
744
745 assert!(!extractor.supports_language(Language::Unknown));
747
748 let supported = extractor.supported_languages();
750 assert!(supported.len() >= 5);
751 assert!(supported.contains(&Language::JavaScript));
752 assert!(supported.contains(&Language::Python));
753 }
754
755 #[test]
756 fn test_javascript_comment_extraction() {
757 let extractor = CommentExtractor::new();
758 let file_path = std::path::Path::new("test.js");
759
760 let source = r#"
761// This is a single line comment
762function test() {
763 /* This is a block comment */
764 return 42;
765}
766
767/**
768 * This is a JSDoc comment
769 * @param value The input value
770 * @returns The result
771 */
772function documented(value) {
773 return value * 2;
774}
775"#;
776
777 let js_extractor = JavaScriptCommentExtractor::new();
780 let patterns = js_extractor.comment_patterns();
781 assert!(patterns.single_line.contains(&"//".to_string()));
782 assert!(patterns.documentation.contains(&"/**".to_string()));
783 }
784
785 #[test]
786 fn test_python_comment_extraction() {
787 let extractor = CommentExtractor::new();
788 let file_path = std::path::Path::new("test.py");
789
790 let source = r#"
791# This is a single line comment
792def test():
793 """
794 This is a docstring
795 with multiple lines
796 """
797 return 42
798
799class Example:
800 '''
801 Another docstring style
802 '''
803 pass
804"#;
805
806 let py_extractor = PythonCommentExtractor::new();
808 let patterns = py_extractor.comment_patterns();
809 assert!(patterns.single_line.contains(&"#".to_string()));
810 assert!(patterns.documentation.contains(&"\"\"\"".to_string()));
811 assert!(patterns.documentation.contains(&"'''".to_string()));
812 }
813
814 #[test]
815 fn test_rust_comment_extraction() {
816 let rust_extractor = RustCommentExtractor::new();
817 let patterns = rust_extractor.comment_patterns();
818
819 assert!(patterns.single_line.contains(&"//".to_string()));
820 assert!(patterns
821 .block
822 .contains(&("/*".to_string(), "*/".to_string())));
823
824 let main_extractor = CommentExtractor::new();
826 assert!(main_extractor.supports_language(Language::Rust));
827 }
828
829 #[test]
830 fn test_java_comment_extraction() {
831 let java_extractor = JavaCommentExtractor::new();
832 let patterns = java_extractor.comment_patterns();
833
834 assert!(patterns.single_line.contains(&"//".to_string()));
835 assert!(patterns
836 .block
837 .contains(&("/*".to_string(), "*/".to_string())));
838
839 let main_extractor = CommentExtractor::new();
841 assert!(main_extractor.supports_language(Language::Java));
842 }
843
844 #[test]
845 fn test_c_comment_extraction() {
846 let c_extractor = CCommentExtractor::new();
847 let patterns = c_extractor.comment_patterns();
848
849 assert!(patterns.single_line.contains(&"//".to_string()));
850 assert!(patterns
851 .block
852 .contains(&("/*".to_string(), "*/".to_string())));
853
854 let main_extractor = CommentExtractor::new();
856 assert!(main_extractor.supports_language(Language::C));
857 }
858
859 #[test]
860 fn test_javascript_jsdoc_cleaning() {
861 let js_extractor = JavaScriptCommentExtractor::new();
862
863 let complex_jsdoc = r#"/**
865 * Complex JSDoc comment
866 * @param {string} name - The name parameter
867 * @param {number} age - The age parameter
868 * @returns {object} The result object
869 * @example
870 * // Usage example
871 * const result = func("John", 25);
872 * @see {@link http://example.com}
873 */"#;
874
875 let cleaned = js_extractor.clean_jsdoc_comment(complex_jsdoc);
876 assert!(cleaned.contains("Complex JSDoc comment"));
877 assert!(cleaned.contains("@param {string} name"));
878 assert!(cleaned.contains("@returns {object}"));
879 assert!(cleaned.contains("@example"));
880 assert!(!cleaned.contains("/**"));
881 assert!(!cleaned.contains("*/"));
882 assert!(!cleaned.contains(" *"));
883 }
884
885 #[test]
886 fn test_python_docstring_variations() {
887 let py_extractor = PythonCommentExtractor::new();
888
889 let triple_quote = r#"""This is a triple quote docstring
891 with multiple lines
892 and various content"""#;
893
894 let single_quote = r#"'''This is a single quote docstring
895 also with multiple lines'''"#;
896
897 let cleaned_triple = py_extractor.clean_docstring(triple_quote);
898 let cleaned_single = py_extractor.clean_docstring(single_quote);
899
900 assert!(!cleaned_triple.contains("\"\"\""));
901 assert!(!cleaned_single.contains("'''"));
902 assert!(cleaned_triple.contains("triple quote docstring"));
903 assert!(cleaned_single.contains("single quote docstring"));
904 }
905
906 #[test]
907 fn test_comment_context_detection() {
908 let js_extractor = JavaScriptCommentExtractor::new();
909
910 let block_comment = "/* This is a block comment */";
912 let inline_comment = "// This is an inline comment";
913
914 assert!(block_comment.starts_with("/*"));
916 assert!(block_comment.contains("*/"));
917
918 assert!(inline_comment.starts_with("//"));
920 assert!(!inline_comment.contains("*/"));
921 }
922
923 #[test]
924 fn test_span_calculation_edge_cases() {
925 let js_extractor = JavaScriptCommentExtractor::new();
926
927 let source_unix = "line1\n// comment\nline3";
929 let source_windows = "line1\r\n// comment\r\nline3";
930 let source_mixed = "line1\r\n// comment\nline3\r\n";
931
932 for source in [source_unix, source_windows, source_mixed] {
933 if let Some(comment_match) = js_extractor.comment_regex.find(source) {
934 let span = js_extractor.calculate_match_span(&comment_match, source);
935 assert!(span.start_line >= 1, "Line numbers should be 1-indexed");
936 assert!(
937 span.end_line >= span.start_line,
938 "End line should be >= start line"
939 );
940 assert!(span.start_column >= 1, "Column numbers should be 1-indexed");
941 }
942 }
943 }
944}