1use super::{CommentContext, ContentChunk, ContentType};
7use crate::ast::{Language, NodeId, Span};
8use anyhow::{anyhow, Result};
9use regex::Regex;
10use std::collections::HashMap;
11use std::path::Path;
12use tree_sitter::Tree;
13
14pub struct CommentExtractor {
16 language_extractors: HashMap<Language, Box<dyn LanguageCommentExtractor>>,
18}
19
20impl CommentExtractor {
21 pub fn new() -> Self {
23 let mut extractors: HashMap<Language, Box<dyn LanguageCommentExtractor>> = HashMap::new();
24
25 extractors.insert(
27 Language::JavaScript,
28 Box::new(JavaScriptCommentExtractor::new()),
29 );
30 extractors.insert(
31 Language::TypeScript,
32 Box::new(JavaScriptCommentExtractor::new()),
33 );
34 extractors.insert(Language::Python, Box::new(PythonCommentExtractor::new()));
35 extractors.insert(Language::Java, Box::new(JavaCommentExtractor::new()));
36 extractors.insert(Language::Rust, Box::new(RustCommentExtractor::new()));
37 extractors.insert(Language::C, Box::new(CCommentExtractor::new()));
38 extractors.insert(Language::Cpp, Box::new(CCommentExtractor::new()));
39
40 Self {
41 language_extractors: extractors,
42 }
43 }
44
45 pub fn extract_comments(
47 &self,
48 language: Language,
49 tree: &Tree,
50 source: &str,
51 file_path: &Path,
52 ast_nodes: &[NodeId],
53 ) -> Result<Vec<ContentChunk>> {
54 let extractor = self
55 .language_extractors
56 .get(&language)
57 .ok_or_else(|| anyhow!("No comment extractor for language: {:?}", language))?;
58
59 extractor.extract_comments(tree, source, file_path, ast_nodes)
60 }
61
62 pub fn supports_language(&self, language: Language) -> bool {
64 self.language_extractors.contains_key(&language)
65 }
66
67 pub fn supported_languages(&self) -> Vec<Language> {
69 self.language_extractors.keys().copied().collect()
70 }
71}
72
73impl Default for CommentExtractor {
74 fn default() -> Self {
75 Self::new()
76 }
77}
78
79pub trait LanguageCommentExtractor: Send + Sync {
81 fn extract_comments(
83 &self,
84 tree: &Tree,
85 source: &str,
86 file_path: &Path,
87 ast_nodes: &[NodeId],
88 ) -> Result<Vec<ContentChunk>>;
89
90 fn comment_patterns(&self) -> &CommentPatterns;
92}
93
94#[derive(Debug, Clone)]
96pub struct CommentPatterns {
97 pub single_line: Vec<String>,
99 pub block: Vec<(String, String)>,
101 pub documentation: Vec<String>,
103}
104
105pub struct JavaScriptCommentExtractor {
107 patterns: CommentPatterns,
108 comment_regex: Regex,
109}
110
111impl Default for JavaScriptCommentExtractor {
112 fn default() -> Self {
113 Self::new()
114 }
115}
116
117impl JavaScriptCommentExtractor {
118 pub fn new() -> Self {
120 Self {
121 patterns: CommentPatterns {
122 single_line: vec!["//".to_string()],
123 block: vec![("/*".to_string(), "*/".to_string())],
124 documentation: vec!["/**".to_string(), "///".to_string()],
125 },
126 comment_regex: Regex::new(r"(?m)//.*$|/\*[\s\S]*?\*/").unwrap(),
127 }
128 }
129}
130
131impl LanguageCommentExtractor for JavaScriptCommentExtractor {
132 fn extract_comments(
133 &self,
134 _tree: &Tree,
135 source: &str,
136 file_path: &Path,
137 _ast_nodes: &[NodeId],
138 ) -> Result<Vec<ContentChunk>> {
139 let mut chunks = Vec::new();
140 let mut chunk_index = 0;
141
142 for comment_match in self.comment_regex.find_iter(source) {
144 let comment_text = comment_match.as_str();
145 let span = self.calculate_match_span(&comment_match, source);
146
147 let cleaned_text = if comment_text.starts_with("/**") {
149 self.clean_jsdoc_comment(comment_text)
150 } else if comment_text.starts_with("/*") {
151 self.clean_block_comment(comment_text)
152 } else {
153 self.clean_single_line_comment(comment_text)
154 };
155
156 if cleaned_text.trim().is_empty() {
158 continue;
159 }
160
161 let context = if comment_text.starts_with("/**") {
162 CommentContext::Documentation
163 } else if comment_text.starts_with("/*") {
164 CommentContext::Block
165 } else {
166 CommentContext::Inline
167 };
168
169 let content_type = ContentType::Comment {
170 language: Language::JavaScript,
171 context,
172 };
173
174 let chunk = ContentChunk::new(
175 file_path.to_path_buf(),
176 content_type,
177 cleaned_text,
178 span,
179 chunk_index,
180 )
181 .with_metadata(serde_json::json!({
182 "raw_text": comment_text,
183 "language": "javascript"
184 }));
185
186 chunks.push(chunk);
187 chunk_index += 1;
188 }
189
190 Ok(chunks)
191 }
192
193 fn comment_patterns(&self) -> &CommentPatterns {
194 &self.patterns
195 }
196}
197
198impl JavaScriptCommentExtractor {
199 fn clean_jsdoc_comment(&self, comment: &str) -> String {
201 comment
202 .trim_start_matches("/**")
203 .trim_end_matches("*/")
204 .lines()
205 .map(|line| line.trim().trim_start_matches('*').trim())
206 .filter(|line| !line.is_empty())
207 .collect::<Vec<_>>()
208 .join("\n")
209 }
210
211 fn clean_block_comment(&self, comment: &str) -> String {
213 comment
214 .trim_start_matches("/*")
215 .trim_end_matches("*/")
216 .trim()
217 .to_string()
218 }
219
220 fn clean_single_line_comment(&self, comment: &str) -> String {
222 comment.trim_start_matches("//").trim().to_string()
223 }
224
225 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
227 let start_byte = match_obj.start();
228 let end_byte = match_obj.end();
229
230 let source_before = &source[..start_byte];
231 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
233 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
234
235 let match_content = match_obj.as_str();
236 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
237 let end_line = start_line + lines_in_match;
238 let end_column = if lines_in_match > 0 {
239 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
240 } else {
241 start_column + match_content.len()
242 };
243
244 Span::new(
245 start_byte,
246 end_byte,
247 start_line,
248 end_line,
249 start_column,
250 end_column,
251 )
252 }
253}
254
255pub struct PythonCommentExtractor {
257 patterns: CommentPatterns,
258 comment_regex: Regex,
259 docstring_regex: Regex,
260}
261
262impl Default for PythonCommentExtractor {
263 fn default() -> Self {
264 Self::new()
265 }
266}
267
268impl PythonCommentExtractor {
269 pub fn new() -> Self {
271 Self {
272 patterns: CommentPatterns {
273 single_line: vec!["#".to_string()],
274 block: vec![
275 ("\"\"\"".to_string(), "\"\"\"".to_string()),
276 ("'''".to_string(), "'''".to_string()),
277 ],
278 documentation: vec!["\"\"\"".to_string(), "'''".to_string()],
279 },
280 comment_regex: Regex::new(r"(?m)#.*$").unwrap(),
281 docstring_regex: Regex::new(r#"("""[\s\S]*?"""|'''[\s\S]*?''')"#).unwrap(),
282 }
283 }
284}
285
286impl LanguageCommentExtractor for PythonCommentExtractor {
287 fn extract_comments(
288 &self,
289 _tree: &Tree,
290 source: &str,
291 file_path: &Path,
292 _ast_nodes: &[NodeId],
293 ) -> Result<Vec<ContentChunk>> {
294 let mut chunks = Vec::new();
295 let mut chunk_index = 0;
296
297 for comment_match in self.comment_regex.find_iter(source) {
299 let comment_text = comment_match.as_str();
300 let cleaned_text = comment_text.trim_start_matches('#').trim().to_string();
301
302 if cleaned_text.is_empty() {
303 continue;
304 }
305
306 let span = self.calculate_match_span(&comment_match, source);
307 let content_type = ContentType::Comment {
308 language: Language::Python,
309 context: CommentContext::Inline,
310 };
311
312 let chunk = ContentChunk::new(
313 file_path.to_path_buf(),
314 content_type,
315 cleaned_text,
316 span,
317 chunk_index,
318 )
319 .with_metadata(serde_json::json!({
320 "raw_text": comment_text,
321 "language": "python"
322 }));
323
324 chunks.push(chunk);
325 chunk_index += 1;
326 }
327
328 for docstring_match in self.docstring_regex.find_iter(source) {
330 let docstring_text = docstring_match.as_str();
331 let cleaned_text = self.clean_docstring(docstring_text);
332
333 if cleaned_text.is_empty() {
334 continue;
335 }
336
337 let span = self.calculate_match_span(&docstring_match, source);
338 let content_type = ContentType::Comment {
339 language: Language::Python,
340 context: CommentContext::Documentation,
341 };
342
343 let chunk = ContentChunk::new(
344 file_path.to_path_buf(),
345 content_type,
346 cleaned_text,
347 span,
348 chunk_index,
349 )
350 .with_metadata(serde_json::json!({
351 "raw_text": docstring_text,
352 "language": "python"
353 }));
354
355 chunks.push(chunk);
356 chunk_index += 1;
357 }
358
359 Ok(chunks)
360 }
361
362 fn comment_patterns(&self) -> &CommentPatterns {
363 &self.patterns
364 }
365}
366
367impl PythonCommentExtractor {
368 fn clean_docstring(&self, docstring: &str) -> String {
370 let cleaned = if docstring.starts_with("\"\"\"") {
371 docstring
372 .trim_start_matches("\"\"\"")
373 .trim_end_matches("\"\"\"")
374 } else {
375 docstring.trim_start_matches("'''").trim_end_matches("'''")
376 };
377
378 cleaned.trim().to_string()
379 }
380
381 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
383 let start_byte = match_obj.start();
384 let end_byte = match_obj.end();
385
386 let source_before = &source[..start_byte];
387 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
389 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
390
391 let match_content = match_obj.as_str();
392 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
393 let end_line = start_line + lines_in_match;
394 let end_column = if lines_in_match > 0 {
395 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
396 } else {
397 start_column + match_content.len()
398 };
399
400 Span::new(
401 start_byte,
402 end_byte,
403 start_line,
404 end_line,
405 start_column,
406 end_column,
407 )
408 }
409}
410
411macro_rules! simple_comment_extractor {
413 ($name:ident, $language:ident, $single_line:expr, $block_start:expr, $block_end:expr) => {
414 pub struct $name {
416 patterns: CommentPatterns,
417 }
418
419 impl Default for $name {
420 fn default() -> Self {
421 Self::new()
422 }
423 }
424
425 impl $name {
426 pub fn new() -> Self {
428 Self {
429 patterns: CommentPatterns {
430 single_line: vec![$single_line.to_string()],
431 block: vec![($block_start.to_string(), $block_end.to_string())],
432 documentation: vec![],
433 },
434 }
435 }
436 }
437
438 impl LanguageCommentExtractor for $name {
439 fn extract_comments(
440 &self,
441 _tree: &Tree,
442 source: &str,
443 file_path: &Path,
444 _ast_nodes: &[NodeId],
445 ) -> Result<Vec<ContentChunk>> {
446 let mut chunks = Vec::new();
447 let single_line_regex =
448 Regex::new(&format!(r"(?m){}.*$", regex::escape($single_line))).unwrap();
449 let block_regex = Regex::new(&format!(
450 r"{}[\s\S]*?{}",
451 regex::escape($block_start),
452 regex::escape($block_end)
453 ))
454 .unwrap();
455
456 let mut chunk_index = 0;
457
458 for comment_match in single_line_regex.find_iter(source) {
460 let comment_text = comment_match.as_str();
461 let cleaned_text = comment_text
462 .trim_start_matches($single_line)
463 .trim()
464 .to_string();
465
466 if cleaned_text.is_empty() {
467 continue;
468 }
469
470 let span = self.calculate_match_span(&comment_match, source);
471 let content_type = ContentType::Comment {
472 language: Language::$language,
473 context: CommentContext::Inline,
474 };
475
476 let chunk = ContentChunk::new(
477 file_path.to_path_buf(),
478 content_type,
479 cleaned_text,
480 span,
481 chunk_index,
482 );
483
484 chunks.push(chunk);
485 chunk_index += 1;
486 }
487
488 for comment_match in block_regex.find_iter(source) {
490 let comment_text = comment_match.as_str();
491 let cleaned_text = comment_text
492 .trim_start_matches($block_start)
493 .trim_end_matches($block_end)
494 .trim()
495 .to_string();
496
497 if cleaned_text.is_empty() {
498 continue;
499 }
500
501 let span = self.calculate_match_span(&comment_match, source);
502 let content_type = ContentType::Comment {
503 language: Language::$language,
504 context: CommentContext::Block,
505 };
506
507 let chunk = ContentChunk::new(
508 file_path.to_path_buf(),
509 content_type,
510 cleaned_text,
511 span,
512 chunk_index,
513 );
514
515 chunks.push(chunk);
516 chunk_index += 1;
517 }
518
519 Ok(chunks)
520 }
521
522 fn comment_patterns(&self) -> &CommentPatterns {
523 &self.patterns
524 }
525 }
526
527 impl $name {
528 fn calculate_match_span(&self, match_obj: ®ex::Match, source: &str) -> Span {
529 let start_byte = match_obj.start();
530 let end_byte = match_obj.end();
531
532 let source_before = &source[..start_byte];
533 let start_line = source_before.chars().filter(|&c| c == '\n').count() + 1;
535 let start_column = source_before.lines().last().map(|l| l.len()).unwrap_or(0) + 1;
536
537 let match_content = match_obj.as_str();
538 let lines_in_match = match_content.chars().filter(|&c| c == '\n').count();
539 let end_line = start_line + lines_in_match;
540 let end_column = if lines_in_match > 0 {
541 match_content.lines().last().map(|l| l.len()).unwrap_or(0) + 1
542 } else {
543 start_column + match_content.len()
544 };
545
546 Span::new(
547 start_byte,
548 end_byte,
549 start_line,
550 end_line,
551 start_column,
552 end_column,
553 )
554 }
555 }
556 };
557}
558
559simple_comment_extractor!(JavaCommentExtractor, Java, "//", "/*", "*/");
561simple_comment_extractor!(RustCommentExtractor, Rust, "//", "/*", "*/");
562simple_comment_extractor!(CCommentExtractor, C, "//", "/*", "*/");
563
564#[cfg(test)]
565mod tests {
566 use super::*;
567
568 #[test]
569 fn test_comment_extractor_creation() {
570 let extractor = CommentExtractor::new();
571 assert!(extractor.supports_language(Language::JavaScript));
572 assert!(extractor.supports_language(Language::Python));
573 assert!(extractor.supports_language(Language::Rust));
574 assert!(!extractor.supports_language(Language::Unknown));
575
576 let supported = extractor.supported_languages();
577 assert!(supported.contains(&Language::JavaScript));
578 assert!(supported.contains(&Language::Python));
579 }
580
581 #[test]
582 fn test_javascript_comment_patterns() {
583 let extractor = JavaScriptCommentExtractor::new();
584 let patterns = extractor.comment_patterns();
585
586 assert!(patterns.single_line.contains(&"//".to_string()));
587 assert!(patterns
588 .block
589 .contains(&("/*".to_string(), "*/".to_string())));
590 assert!(patterns.documentation.contains(&"/**".to_string()));
591 }
592
593 #[test]
594 fn test_python_comment_patterns() {
595 let extractor = PythonCommentExtractor::new();
596 let patterns = extractor.comment_patterns();
597
598 assert!(patterns.single_line.contains(&"#".to_string()));
599 assert!(patterns
600 .block
601 .contains(&("\"\"\"".to_string(), "\"\"\"".to_string())));
602 assert!(patterns.documentation.contains(&"\"\"\"".to_string()));
603 }
604
605 #[test]
606 fn test_comment_pattern_matching() {
607 let js_extractor = JavaScriptCommentExtractor::new();
608
609 let source = "// Single line comment\n/* Block comment */";
611 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
612 assert_eq!(matches.len(), 2, "Should find 2 comment matches");
613
614 assert_eq!(matches[0].as_str(), "// Single line comment");
615 assert_eq!(matches[1].as_str(), "/* Block comment */");
616 }
617
618 #[test]
619 fn test_comment_cleaning() {
620 let js_extractor = JavaScriptCommentExtractor::new();
621
622 let jsdoc = "/**\n * This is a JSDoc comment\n * @param value The input value\n */";
624 let cleaned = js_extractor.clean_jsdoc_comment(jsdoc);
625 assert!(cleaned.contains("This is a JSDoc comment"));
626 assert!(cleaned.contains("@param value The input value"));
627 assert!(!cleaned.contains("/**"));
628 assert!(!cleaned.contains("*/"));
629
630 let block = "/* This is a block comment */";
632 let cleaned = js_extractor.clean_block_comment(block);
633 assert_eq!(cleaned, "This is a block comment");
634
635 let single = "// This is a single line comment";
637 let cleaned = js_extractor.clean_single_line_comment(single);
638 assert_eq!(cleaned, "This is a single line comment");
639 }
640
641 #[test]
642 fn test_python_docstring_cleaning() {
643 let py_extractor = PythonCommentExtractor::new();
644
645 let docstring = "\"\"\"This is a docstring\nwith multiple lines\"\"\"";
647 let cleaned = py_extractor.clean_docstring(docstring);
648 assert!(cleaned.contains("This is a docstring"));
649 assert!(!cleaned.contains("\"\"\""));
650
651 let docstring = "'''This is another docstring'''";
653 let cleaned = py_extractor.clean_docstring(docstring);
654 assert_eq!(cleaned, "This is another docstring");
655 }
656
657 #[test]
658 fn test_span_calculation() {
659 let js_extractor = JavaScriptCommentExtractor::new();
660 let source = "const x = 5;\n// This is a comment\nconst y = 10;";
661
662 if let Some(comment_match) = js_extractor.comment_regex.find(source) {
663 let span = js_extractor.calculate_match_span(&comment_match, source);
664
665 assert_eq!(span.start_line, 2);
666 assert_eq!(span.end_line, 2);
667 assert!(span.start_column >= 1);
668 assert!(span.end_column > span.start_column);
669 assert_eq!(comment_match.as_str(), "// This is a comment");
670 } else {
671 panic!("Should find comment in source");
672 }
673 }
674
675 #[test]
676 fn test_regex_edge_cases() {
677 let js_extractor = JavaScriptCommentExtractor::new();
678
679 let source = "/* outer /* inner */ comment */";
681 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
682 assert!(
683 !matches.is_empty(),
684 "Should handle nested comments gracefully"
685 );
686
687 let source = "const x = 5; // Comment at end";
689 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
690 assert_eq!(matches.len(), 1);
691 assert_eq!(matches[0].as_str(), "// Comment at end");
692
693 let source = "// \n/* */";
695 let matches: Vec<_> = js_extractor.comment_regex.find_iter(source).collect();
696 assert_eq!(matches.len(), 2);
697 }
698
699 #[test]
700 fn test_comprehensive_regex_edge_cases() {
701 let js_extractor = JavaScriptCommentExtractor::new();
702
703 let multiline_source = r#"
705const x = 1; // Comment on line 2
706// Another comment on line 3
707/* Block comment
708 spanning multiple
709 lines */
710const y = 2; // Final comment
711"#;
712
713 let matches: Vec<_> = js_extractor
714 .comment_regex
715 .find_iter(multiline_source)
716 .collect();
717 assert!(
718 matches.len() >= 4,
719 "Should find all comment types including multiline block"
720 );
721
722 let comment_texts: Vec<&str> = matches.iter().map(|m| m.as_str()).collect();
724 assert!(comment_texts
725 .iter()
726 .any(|&text| text.contains("Comment on line 2")));
727 assert!(comment_texts
728 .iter()
729 .any(|&text| text.contains("Another comment")));
730 assert!(comment_texts
731 .iter()
732 .any(|&text| text.contains("spanning multiple")));
733 assert!(comment_texts
734 .iter()
735 .any(|&text| text.contains("Final comment")));
736 }
737
738 #[test]
739 fn test_main_comment_extractor() {
740 let extractor = CommentExtractor::new();
741
742 assert!(extractor.supports_language(Language::JavaScript));
744 assert!(extractor.supports_language(Language::Python));
745 assert!(extractor.supports_language(Language::Rust));
746 assert!(extractor.supports_language(Language::Java));
747 assert!(extractor.supports_language(Language::C));
748
749 assert!(!extractor.supports_language(Language::Unknown));
751
752 let supported = extractor.supported_languages();
754 assert!(supported.len() >= 5);
755 assert!(supported.contains(&Language::JavaScript));
756 assert!(supported.contains(&Language::Python));
757 }
758
759 #[test]
760 fn test_javascript_comment_extraction() {
761 let _extractor = CommentExtractor::new();
762 let _file_path = std::path::Path::new("test.js");
763
764 let _source = r#"
765// This is a single line comment
766function test() {
767 /* This is a block comment */
768 return 42;
769}
770
771/**
772 * This is a JSDoc comment
773 * @param value The input value
774 * @returns The result
775 */
776function documented(value) {
777 return value * 2;
778}
779"#;
780
781 let js_extractor = JavaScriptCommentExtractor::new();
784 let patterns = js_extractor.comment_patterns();
785 assert!(patterns.single_line.contains(&"//".to_string()));
786 assert!(patterns.documentation.contains(&"/**".to_string()));
787 }
788
789 #[test]
790 fn test_python_comment_extraction() {
791 let _extractor = CommentExtractor::new();
792 let _file_path = std::path::Path::new("test.py");
793
794 let _source = r#"
795# This is a single line comment
796def test():
797 """
798 This is a docstring
799 with multiple lines
800 """
801 return 42
802
803class Example:
804 '''
805 Another docstring style
806 '''
807 pass
808"#;
809
810 let py_extractor = PythonCommentExtractor::new();
812 let patterns = py_extractor.comment_patterns();
813 assert!(patterns.single_line.contains(&"#".to_string()));
814 assert!(patterns.documentation.contains(&"\"\"\"".to_string()));
815 assert!(patterns.documentation.contains(&"'''".to_string()));
816 }
817
818 #[test]
819 fn test_rust_comment_extraction() {
820 let rust_extractor = RustCommentExtractor::new();
821 let patterns = rust_extractor.comment_patterns();
822
823 assert!(patterns.single_line.contains(&"//".to_string()));
824 assert!(patterns
825 .block
826 .contains(&("/*".to_string(), "*/".to_string())));
827
828 let main_extractor = CommentExtractor::new();
830 assert!(main_extractor.supports_language(Language::Rust));
831 }
832
833 #[test]
834 fn test_java_comment_extraction() {
835 let java_extractor = JavaCommentExtractor::new();
836 let patterns = java_extractor.comment_patterns();
837
838 assert!(patterns.single_line.contains(&"//".to_string()));
839 assert!(patterns
840 .block
841 .contains(&("/*".to_string(), "*/".to_string())));
842
843 let main_extractor = CommentExtractor::new();
845 assert!(main_extractor.supports_language(Language::Java));
846 }
847
848 #[test]
849 fn test_c_comment_extraction() {
850 let c_extractor = CCommentExtractor::new();
851 let patterns = c_extractor.comment_patterns();
852
853 assert!(patterns.single_line.contains(&"//".to_string()));
854 assert!(patterns
855 .block
856 .contains(&("/*".to_string(), "*/".to_string())));
857
858 let main_extractor = CommentExtractor::new();
860 assert!(main_extractor.supports_language(Language::C));
861 }
862
863 #[test]
864 fn test_javascript_jsdoc_cleaning() {
865 let js_extractor = JavaScriptCommentExtractor::new();
866
867 let complex_jsdoc = r#"/**
869 * Complex JSDoc comment
870 * @param {string} name - The name parameter
871 * @param {number} age - The age parameter
872 * @returns {object} The result object
873 * @example
874 * // Usage example
875 * const result = func("John", 25);
876 * @see {@link http://example.com}
877 */"#;
878
879 let cleaned = js_extractor.clean_jsdoc_comment(complex_jsdoc);
880 assert!(cleaned.contains("Complex JSDoc comment"));
881 assert!(cleaned.contains("@param {string} name"));
882 assert!(cleaned.contains("@returns {object}"));
883 assert!(cleaned.contains("@example"));
884 assert!(!cleaned.contains("/**"));
885 assert!(!cleaned.contains("*/"));
886 assert!(!cleaned.contains(" *"));
887 }
888
889 #[test]
890 fn test_python_docstring_variations() {
891 let py_extractor = PythonCommentExtractor::new();
892
893 let triple_quote = r#"""This is a triple quote docstring
895 with multiple lines
896 and various content"""#;
897
898 let single_quote = r#"'''This is a single quote docstring
899 also with multiple lines'''"#;
900
901 let cleaned_triple = py_extractor.clean_docstring(triple_quote);
902 let cleaned_single = py_extractor.clean_docstring(single_quote);
903
904 assert!(!cleaned_triple.contains("\"\"\""));
905 assert!(!cleaned_single.contains("'''"));
906 assert!(cleaned_triple.contains("triple quote docstring"));
907 assert!(cleaned_single.contains("single quote docstring"));
908 }
909
910 #[test]
911 fn test_comment_context_detection() {
912 let _js_extractor = JavaScriptCommentExtractor::new();
913
914 let block_comment = "/* This is a block comment */";
916 let inline_comment = "// This is an inline comment";
917
918 assert!(block_comment.starts_with("/*"));
920 assert!(block_comment.contains("*/"));
921
922 assert!(inline_comment.starts_with("//"));
924 assert!(!inline_comment.contains("*/"));
925 }
926
927 #[test]
928 fn test_span_calculation_edge_cases() {
929 let js_extractor = JavaScriptCommentExtractor::new();
930
931 let source_unix = "line1\n// comment\nline3";
933 let source_windows = "line1\r\n// comment\r\nline3";
934 let source_mixed = "line1\r\n// comment\nline3\r\n";
935
936 for source in [source_unix, source_windows, source_mixed] {
937 if let Some(comment_match) = js_extractor.comment_regex.find(source) {
938 let span = js_extractor.calculate_match_span(&comment_match, source);
939 assert!(span.start_line >= 1, "Line numbers should be 1-indexed");
940 assert!(
941 span.end_line >= span.start_line,
942 "End line should be >= start line"
943 );
944 assert!(span.start_column >= 1, "Column numbers should be 1-indexed");
945 }
946 }
947 }
948}