Skip to main content

infiniloom_engine/analysis/
documentation.rs

1//! Documentation extraction and parsing for all supported languages
2//!
3//! Parses JSDoc, Python docstrings, Rust doc comments, JavaDoc, etc.
4//! into structured documentation format.
5
6use crate::analysis::types::{Documentation, Example, ParamDoc, ReturnDoc, ThrowsDoc};
7use crate::parser::Language;
8use regex::Regex;
9
10/// Extracts and parses documentation from source code
11pub struct DocumentationExtractor {
12    // Precompiled regex patterns
13    jsdoc_param: Regex,
14    jsdoc_returns: Regex,
15    jsdoc_throws: Regex,
16    jsdoc_example: Regex,
17    jsdoc_tag: Regex,
18    python_param: Regex,
19    python_returns: Regex,
20    python_raises: Regex,
21    rust_param: Regex,
22}
23
24impl DocumentationExtractor {
25    /// Create a new documentation extractor
26    pub fn new() -> Self {
27        Self {
28            // JSDoc patterns
29            jsdoc_param: Regex::new(r"@param\s+(?:\{([^}]+)\}\s+)?(\[)?(\w+)\]?\s*(?:-\s*)?(.*)")
30                .unwrap(),
31            jsdoc_returns: Regex::new(r"@returns?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
32            jsdoc_throws: Regex::new(r"@throws?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
33            // Note: Example parsing is done manually in parse_jsdoc via in_example state
34            jsdoc_example: Regex::new(r"@example\s*").unwrap(),
35            jsdoc_tag: Regex::new(r"@(\w+)\s+(.*)").unwrap(),
36
37            // Python docstring patterns (Google/NumPy style)
38            python_param: Regex::new(r"^\s*(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$").unwrap(),
39            python_returns: Regex::new(r"^\s*(?:(\w+)\s*:\s*)?(.*)$").unwrap(),
40            python_raises: Regex::new(r"^\s*(\w+)\s*:\s*(.*)$").unwrap(),
41
42            // Rust doc patterns
43            rust_param: Regex::new(r"^\s*\*\s+`(\w+)`\s*(?:-\s*)?(.*)$").unwrap(),
44        }
45    }
46
47    /// Extract documentation from a docstring/comment based on language
48    pub fn extract(&self, raw_doc: &str, language: Language) -> Documentation {
49        let raw_doc = raw_doc.trim();
50        if raw_doc.is_empty() {
51            return Documentation::default();
52        }
53
54        match language {
55            Language::JavaScript | Language::TypeScript => self.parse_jsdoc(raw_doc),
56            Language::Python => self.parse_python_docstring(raw_doc),
57            Language::Rust => self.parse_rust_doc(raw_doc),
58            Language::Java | Language::Kotlin => self.parse_javadoc(raw_doc),
59            Language::Go => self.parse_go_doc(raw_doc),
60            Language::Ruby => self.parse_ruby_doc(raw_doc),
61            Language::Php => self.parse_phpdoc(raw_doc),
62            Language::CSharp => self.parse_csharp_doc(raw_doc),
63            Language::Swift => self.parse_swift_doc(raw_doc),
64            Language::Scala => self.parse_scaladoc(raw_doc),
65            Language::Haskell => self.parse_haddock(raw_doc),
66            Language::Elixir => self.parse_exdoc(raw_doc),
67            Language::Clojure => self.parse_clojure_doc(raw_doc),
68            Language::OCaml => self.parse_ocamldoc(raw_doc),
69            Language::Lua => self.parse_luadoc(raw_doc),
70            Language::R => self.parse_roxygen(raw_doc),
71            Language::Cpp | Language::C => self.parse_doxygen(raw_doc),
72            Language::Bash => self.parse_bash_comment(raw_doc),
73            // Handle any language not explicitly matched (e.g., FSharp)
74            _ => self.parse_generic(raw_doc),
75        }
76    }
77
78    /// Parse JSDoc style documentation
79    fn parse_jsdoc(&self, raw: &str) -> Documentation {
80        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
81
82        // Remove comment markers
83        let content = self.strip_comment_markers(raw, "/**", "*/", "*");
84
85        // Split into lines
86        let lines: Vec<&str> = content.lines().collect();
87
88        // First non-tag lines are the description
89        let mut description_lines = Vec::new();
90        let mut in_description = true;
91        let mut current_example = String::new();
92        let mut in_example = false;
93
94        for line in &lines {
95            let line = line.trim();
96
97            if line.starts_with('@') {
98                in_description = false;
99
100                // End any current example
101                if in_example && !line.starts_with("@example") {
102                    if !current_example.is_empty() {
103                        doc.examples.push(Example {
104                            code: current_example.trim().to_owned(),
105                            ..Default::default()
106                        });
107                    }
108                    current_example.clear();
109                    in_example = false;
110                }
111
112                // Parse different tags
113                if let Some(caps) = self.jsdoc_param.captures(line) {
114                    let type_info = caps.get(1).map(|m| m.as_str().to_owned());
115                    let is_optional = caps.get(2).is_some();
116                    let name = caps.get(3).map_or("", |m| m.as_str());
117                    let desc = caps.get(4).map_or("", |m| m.as_str());
118
119                    doc.params.push(ParamDoc {
120                        name: name.to_owned(),
121                        type_info,
122                        description: if desc.is_empty() {
123                            None
124                        } else {
125                            Some(desc.to_owned())
126                        },
127                        is_optional,
128                        default_value: None,
129                    });
130                } else if let Some(caps) = self.jsdoc_returns.captures(line) {
131                    doc.returns = Some(ReturnDoc {
132                        type_info: caps.get(1).map(|m| m.as_str().to_owned()),
133                        description: caps.get(2).map(|m| m.as_str().to_owned()),
134                    });
135                } else if let Some(caps) = self.jsdoc_throws.captures(line) {
136                    doc.throws.push(ThrowsDoc {
137                        exception_type: caps
138                            .get(1)
139                            .map_or_else(|| "Error".to_owned(), |m| m.as_str().to_owned()),
140                        description: caps.get(2).map(|m| m.as_str().to_owned()),
141                    });
142                } else if line.starts_with("@example") {
143                    in_example = true;
144                    // Content after @example on same line
145                    let after_tag = line.strip_prefix("@example").unwrap_or("").trim();
146                    if !after_tag.is_empty() {
147                        current_example.push_str(after_tag);
148                        current_example.push('\n');
149                    }
150                } else if line.starts_with("@deprecated") {
151                    doc.is_deprecated = true;
152                    let msg = line.strip_prefix("@deprecated").unwrap_or("").trim();
153                    if !msg.is_empty() {
154                        doc.deprecation_message = Some(msg.to_owned());
155                    }
156                } else if let Some(caps) = self.jsdoc_tag.captures(line) {
157                    let tag = caps.get(1).map_or("", |m| m.as_str());
158                    let value = caps.get(2).map_or("", |m| m.as_str());
159                    doc.tags
160                        .entry(tag.to_owned())
161                        .or_default()
162                        .push(value.to_owned());
163                }
164            } else if in_example {
165                current_example.push_str(line);
166                current_example.push('\n');
167            } else if in_description {
168                description_lines.push(line);
169            }
170        }
171
172        // Handle last example
173        if !current_example.is_empty() {
174            doc.examples
175                .push(Example { code: current_example.trim().to_owned(), ..Default::default() });
176        }
177
178        // Set description
179        if !description_lines.is_empty() {
180            let full_desc = description_lines.join("\n");
181            let sentences: Vec<&str> = full_desc.split(". ").collect();
182            if !sentences.is_empty() {
183                doc.summary = Some(sentences[0].to_owned());
184            }
185            doc.description = Some(full_desc);
186        }
187
188        doc
189    }
190
191    /// Parse Python docstring (Google/NumPy/Sphinx style)
192    fn parse_python_docstring(&self, raw: &str) -> Documentation {
193        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
194
195        // Remove triple quotes
196        let content = raw
197            .trim_start_matches("\"\"\"")
198            .trim_end_matches("\"\"\"")
199            .trim_start_matches("'''")
200            .trim_end_matches("'''")
201            .trim();
202
203        let lines: Vec<&str> = content.lines().collect();
204
205        #[derive(PartialEq)]
206        enum Section {
207            Description,
208            Args,
209            Returns,
210            Raises,
211            Example,
212            Other,
213        }
214
215        let mut section = Section::Description;
216        let mut description_lines = Vec::new();
217        let mut current_param: Option<ParamDoc> = None;
218        let mut current_example = String::new();
219
220        for line in lines {
221            let trimmed = line.trim();
222
223            // Check for section headers
224            if trimmed == "Args:" || trimmed == "Arguments:" || trimmed == "Parameters:" {
225                section = Section::Args;
226                continue;
227            } else if trimmed == "Returns:" || trimmed == "Return:" {
228                section = Section::Returns;
229                continue;
230            } else if trimmed == "Raises:" || trimmed == "Throws:" || trimmed == "Exceptions:" {
231                section = Section::Raises;
232                continue;
233            } else if trimmed == "Example:" || trimmed == "Examples:" {
234                section = Section::Example;
235                continue;
236            } else if trimmed.ends_with(':') && !trimmed.contains(' ') {
237                section = Section::Other;
238                continue;
239            }
240
241            match section {
242                Section::Description => {
243                    description_lines.push(trimmed);
244                },
245                Section::Args => {
246                    if let Some(caps) = self.python_param.captures(trimmed) {
247                        // Save previous param
248                        if let Some(param) = current_param.take() {
249                            doc.params.push(param);
250                        }
251
252                        let name = caps.get(1).map_or("", |m| m.as_str());
253                        let type_info = caps.get(2).map(|m| m.as_str().to_owned());
254                        let desc = caps.get(3).map(|m| m.as_str());
255
256                        current_param = Some(ParamDoc {
257                            name: name.to_owned(),
258                            type_info,
259                            description: desc.map(String::from),
260                            is_optional: false,
261                            default_value: None,
262                        });
263                    } else if let Some(ref mut param) = current_param {
264                        // Continuation of previous param description
265                        if let Some(ref mut desc) = param.description {
266                            desc.push(' ');
267                            desc.push_str(trimmed);
268                        }
269                    }
270                },
271                Section::Returns => {
272                    if doc.returns.is_none() {
273                        if let Some(caps) = self.python_returns.captures(trimmed) {
274                            doc.returns = Some(ReturnDoc {
275                                type_info: caps.get(1).map(|m| m.as_str().to_owned()),
276                                description: caps.get(2).map(|m| m.as_str().to_owned()),
277                            });
278                        }
279                    } else if let Some(ref mut ret) = doc.returns {
280                        if let Some(ref mut desc) = ret.description {
281                            desc.push(' ');
282                            desc.push_str(trimmed);
283                        }
284                    }
285                },
286                Section::Raises => {
287                    if let Some(caps) = self.python_raises.captures(trimmed) {
288                        doc.throws.push(ThrowsDoc {
289                            exception_type: caps
290                                .get(1)
291                                .map(|m| m.as_str().to_owned())
292                                .unwrap_or_default(),
293                            description: caps.get(2).map(|m| m.as_str().to_owned()),
294                        });
295                    }
296                },
297                Section::Example => {
298                    current_example.push_str(line);
299                    current_example.push('\n');
300                },
301                Section::Other => {},
302            }
303        }
304
305        // Save last param
306        if let Some(param) = current_param {
307            doc.params.push(param);
308        }
309
310        // Save example
311        if !current_example.is_empty() {
312            doc.examples.push(Example {
313                code: current_example.trim().to_owned(),
314                language: Some("python".to_owned()),
315                ..Default::default()
316            });
317        }
318
319        // Set description
320        let desc = description_lines.join(" ");
321        if !desc.is_empty() {
322            let sentences: Vec<&str> = desc.split(". ").collect();
323            if !sentences.is_empty() {
324                doc.summary = Some(sentences[0].to_owned());
325            }
326            doc.description = Some(desc);
327        }
328
329        doc
330    }
331
332    /// Parse Rust doc comments
333    fn parse_rust_doc(&self, raw: &str) -> Documentation {
334        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
335
336        // Remove /// or //! or /** */
337        let content = self.strip_rust_doc_markers(raw);
338
339        let lines: Vec<&str> = content.lines().collect();
340
341        #[derive(PartialEq)]
342        enum Section {
343            Description,
344            Arguments,
345            Returns,
346            Errors,
347            Panics,
348            Examples,
349            Safety,
350        }
351
352        let mut section = Section::Description;
353        let mut description_lines = Vec::new();
354        let mut current_example = String::new();
355
356        for line in lines {
357            let trimmed = line.trim();
358
359            // Check for section headers (# Headers in Rust docs)
360            if trimmed.starts_with("# ") {
361                let header = trimmed[2..].to_lowercase();
362                section = match header.as_str() {
363                    "arguments" | "parameters" => Section::Arguments,
364                    "returns" => Section::Returns,
365                    "errors" => Section::Errors,
366                    "panics" => Section::Panics,
367                    "examples" | "example" => Section::Examples,
368                    "safety" => Section::Safety,
369                    _ => Section::Description,
370                };
371                continue;
372            }
373
374            match section {
375                Section::Description => {
376                    description_lines.push(trimmed);
377                },
378                Section::Arguments => {
379                    if let Some(caps) = self.rust_param.captures(trimmed) {
380                        doc.params.push(ParamDoc {
381                            name: caps
382                                .get(1)
383                                .map(|m| m.as_str().to_owned())
384                                .unwrap_or_default(),
385                            description: caps.get(2).map(|m| m.as_str().to_owned()),
386                            ..Default::default()
387                        });
388                    }
389                },
390                Section::Returns => {
391                    if doc.returns.is_none() {
392                        doc.returns = Some(ReturnDoc {
393                            description: Some(trimmed.to_owned()),
394                            ..Default::default()
395                        });
396                    }
397                },
398                Section::Errors => {
399                    if !trimmed.is_empty() {
400                        doc.throws.push(ThrowsDoc {
401                            exception_type: "Error".to_owned(),
402                            description: Some(trimmed.to_owned()),
403                        });
404                    }
405                },
406                Section::Panics => {
407                    doc.tags
408                        .entry("panics".to_owned())
409                        .or_default()
410                        .push(trimmed.to_owned());
411                },
412                Section::Examples => {
413                    current_example.push_str(line);
414                    current_example.push('\n');
415                },
416                Section::Safety => {
417                    doc.tags
418                        .entry("safety".to_owned())
419                        .or_default()
420                        .push(trimmed.to_owned());
421                },
422            }
423        }
424
425        // Save example
426        if !current_example.is_empty() {
427            // Extract code blocks (```rust ... ```)
428            let code_block_re = Regex::new(r"```(?:rust)?\n([\s\S]*?)```").unwrap();
429            for caps in code_block_re.captures_iter(&current_example) {
430                if let Some(code) = caps.get(1) {
431                    doc.examples.push(Example {
432                        code: code.as_str().trim().to_owned(),
433                        language: Some("rust".to_owned()),
434                        ..Default::default()
435                    });
436                }
437            }
438        }
439
440        // Set description
441        let desc = description_lines.join(" ");
442        if !desc.is_empty() {
443            let sentences: Vec<&str> = desc.split(". ").collect();
444            if !sentences.is_empty() {
445                doc.summary = Some(sentences[0].to_owned());
446            }
447            doc.description = Some(desc);
448        }
449
450        doc
451    }
452
453    /// Parse JavaDoc style documentation
454    fn parse_javadoc(&self, raw: &str) -> Documentation {
455        // JavaDoc is similar to JSDoc
456        self.parse_jsdoc(raw)
457    }
458
459    /// Parse Go doc comments
460    fn parse_go_doc(&self, raw: &str) -> Documentation {
461        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
462
463        // Go uses simple // comments
464        let content: String = raw
465            .lines()
466            .map(|l| l.trim_start_matches("//").trim())
467            .collect::<Vec<_>>()
468            .join(" ");
469
470        // First sentence is summary
471        let sentences: Vec<&str> = content.split(". ").collect();
472        if !sentences.is_empty() {
473            doc.summary = Some(sentences[0].to_owned());
474        }
475        doc.description = Some(content);
476
477        // Check for Deprecated
478        if raw.to_lowercase().contains("deprecated") {
479            doc.is_deprecated = true;
480        }
481
482        doc
483    }
484
485    /// Parse Ruby RDoc/YARD
486    fn parse_ruby_doc(&self, raw: &str) -> Documentation {
487        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
488
489        let content = self.strip_comment_markers(raw, "=begin", "=end", "#");
490
491        // YARD style @param, @return, @raise
492        let param_re = Regex::new(r"@param\s+\[([^\]]+)\]\s+(\w+)\s+(.*)").unwrap();
493        let return_re = Regex::new(r"@return\s+\[([^\]]+)\]\s+(.*)").unwrap();
494        let raise_re = Regex::new(r"@raise\s+\[([^\]]+)\]\s+(.*)").unwrap();
495
496        for line in content.lines() {
497            let line = line.trim();
498
499            if let Some(caps) = param_re.captures(line) {
500                doc.params.push(ParamDoc {
501                    name: caps
502                        .get(2)
503                        .map(|m| m.as_str().to_owned())
504                        .unwrap_or_default(),
505                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
506                    description: caps.get(3).map(|m| m.as_str().to_owned()),
507                    ..Default::default()
508                });
509            } else if let Some(caps) = return_re.captures(line) {
510                doc.returns = Some(ReturnDoc {
511                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
512                    description: caps.get(2).map(|m| m.as_str().to_owned()),
513                });
514            } else if let Some(caps) = raise_re.captures(line) {
515                doc.throws.push(ThrowsDoc {
516                    exception_type: caps
517                        .get(1)
518                        .map(|m| m.as_str().to_owned())
519                        .unwrap_or_default(),
520                    description: caps.get(2).map(|m| m.as_str().to_owned()),
521                });
522            } else if !line.starts_with('@') && doc.description.is_none() {
523                doc.description = Some(line.to_owned());
524                doc.summary = Some(line.to_owned());
525            }
526        }
527
528        doc
529    }
530
531    /// Parse PHPDoc
532    fn parse_phpdoc(&self, raw: &str) -> Documentation {
533        // PHPDoc is similar to JSDoc
534        self.parse_jsdoc(raw)
535    }
536
537    /// Parse C# XML documentation
538    fn parse_csharp_doc(&self, raw: &str) -> Documentation {
539        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
540
541        // C# uses XML documentation
542        let summary_re = Regex::new(r"<summary>([\s\S]*?)</summary>").unwrap();
543        let param_re = Regex::new(r#"<param name="(\w+)">([\s\S]*?)</param>"#).unwrap();
544        let returns_re = Regex::new(r"<returns>([\s\S]*?)</returns>").unwrap();
545        let exception_re =
546            Regex::new(r#"<exception cref="([^"]+)">([\s\S]*?)</exception>"#).unwrap();
547
548        if let Some(caps) = summary_re.captures(raw) {
549            let summary = caps.get(1).map(|m| m.as_str().trim().to_owned());
550            doc.summary = summary.clone();
551            doc.description = summary;
552        }
553
554        for caps in param_re.captures_iter(raw) {
555            doc.params.push(ParamDoc {
556                name: caps
557                    .get(1)
558                    .map(|m| m.as_str().to_owned())
559                    .unwrap_or_default(),
560                description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
561                ..Default::default()
562            });
563        }
564
565        if let Some(caps) = returns_re.captures(raw) {
566            doc.returns = Some(ReturnDoc {
567                description: caps.get(1).map(|m| m.as_str().trim().to_owned()),
568                ..Default::default()
569            });
570        }
571
572        for caps in exception_re.captures_iter(raw) {
573            doc.throws.push(ThrowsDoc {
574                exception_type: caps
575                    .get(1)
576                    .map(|m| m.as_str().to_owned())
577                    .unwrap_or_default(),
578                description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
579            });
580        }
581
582        doc
583    }
584
585    /// Parse Swift documentation comments
586    fn parse_swift_doc(&self, raw: &str) -> Documentation {
587        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
588
589        // Swift uses /// or /** */ with - Parameter:, - Returns:, - Throws:
590        let content = self.strip_comment_markers(raw, "/**", "*/", "///");
591
592        let param_re = Regex::new(r"-\s*Parameter\s+(\w+):\s*(.*)").unwrap();
593        let returns_re = Regex::new(r"-\s*Returns:\s*(.*)").unwrap();
594        let throws_re = Regex::new(r"-\s*Throws:\s*(.*)").unwrap();
595
596        let mut description_lines = Vec::new();
597
598        for line in content.lines() {
599            let line = line.trim();
600
601            if let Some(caps) = param_re.captures(line) {
602                doc.params.push(ParamDoc {
603                    name: caps
604                        .get(1)
605                        .map(|m| m.as_str().to_owned())
606                        .unwrap_or_default(),
607                    description: caps.get(2).map(|m| m.as_str().to_owned()),
608                    ..Default::default()
609                });
610            } else if let Some(caps) = returns_re.captures(line) {
611                doc.returns = Some(ReturnDoc {
612                    description: caps.get(1).map(|m| m.as_str().to_owned()),
613                    ..Default::default()
614                });
615            } else if let Some(caps) = throws_re.captures(line) {
616                doc.throws.push(ThrowsDoc {
617                    exception_type: "Error".to_owned(),
618                    description: caps.get(1).map(|m| m.as_str().to_owned()),
619                });
620            } else if !line.starts_with('-') && !line.is_empty() {
621                description_lines.push(line);
622            }
623        }
624
625        if !description_lines.is_empty() {
626            let desc = description_lines.join(" ");
627            doc.summary = Some(description_lines[0].to_owned());
628            doc.description = Some(desc);
629        }
630
631        doc
632    }
633
634    /// Parse ScalaDoc
635    fn parse_scaladoc(&self, raw: &str) -> Documentation {
636        // ScalaDoc is similar to JavaDoc
637        self.parse_javadoc(raw)
638    }
639
640    /// Parse Haddock (Haskell)
641    fn parse_haddock(&self, raw: &str) -> Documentation {
642        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
643
644        // Haddock uses -- | or {- | -}
645        let content = raw
646            .lines()
647            .map(|l| {
648                l.trim_start_matches("--")
649                    .trim_start_matches('|')
650                    .trim_start_matches('^')
651                    .trim()
652            })
653            .collect::<Vec<_>>()
654            .join(" ");
655
656        doc.description = Some(content.clone());
657        let sentences: Vec<&str> = content.split(". ").collect();
658        if !sentences.is_empty() {
659            doc.summary = Some(sentences[0].to_owned());
660        }
661
662        doc
663    }
664
665    /// Parse ExDoc (Elixir)
666    fn parse_exdoc(&self, raw: &str) -> Documentation {
667        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
668
669        // ExDoc uses @doc """ ... """ or @moduledoc
670        let content = raw
671            .trim_start_matches("@doc")
672            .trim_start_matches("@moduledoc")
673            .trim()
674            .trim_start_matches("\"\"\"")
675            .trim_end_matches("\"\"\"")
676            .trim();
677
678        // Parse markdown-style documentation
679        let lines: Vec<&str> = content.lines().collect();
680        let mut description_lines = Vec::new();
681
682        for line in lines {
683            let trimmed = line.trim();
684
685            // Check for ## Parameters, ## Returns, etc.
686            if trimmed.starts_with("##") {
687                // Section header
688                continue;
689            }
690
691            if trimmed.starts_with('*') || trimmed.starts_with('-') {
692                // List item - could be a parameter
693                let item = trimmed.trim_start_matches(['*', '-']).trim();
694                if item.contains(':') {
695                    let parts: Vec<&str> = item.splitn(2, ':').collect();
696                    if parts.len() == 2 {
697                        doc.params.push(ParamDoc {
698                            name: parts[0].trim().to_owned(),
699                            description: Some(parts[1].trim().to_owned()),
700                            ..Default::default()
701                        });
702                    }
703                }
704            } else if !trimmed.is_empty() {
705                description_lines.push(trimmed);
706            }
707        }
708
709        if !description_lines.is_empty() {
710            doc.summary = Some(description_lines[0].to_owned());
711            doc.description = Some(description_lines.join(" "));
712        }
713
714        doc
715    }
716
717    /// Parse Clojure docstring
718    fn parse_clojure_doc(&self, raw: &str) -> Documentation {
719        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
720
721        // Clojure docstrings are simple strings
722        let content = raw.trim_matches('"');
723
724        doc.description = Some(content.to_owned());
725        let sentences: Vec<&str> = content.split(". ").collect();
726        if !sentences.is_empty() {
727            doc.summary = Some(sentences[0].to_owned());
728        }
729
730        doc
731    }
732
733    /// Parse OCamldoc
734    fn parse_ocamldoc(&self, raw: &str) -> Documentation {
735        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
736
737        // OCamldoc uses (** ... *)
738        let content = raw.trim_start_matches("(**").trim_end_matches("*)").trim();
739
740        // Parse @param, @return, @raise
741        let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
742        let return_re = Regex::new(r"@return\s+(.*)").unwrap();
743        let raise_re = Regex::new(r"@raise\s+(\w+)\s+(.*)").unwrap();
744
745        let mut description_lines = Vec::new();
746
747        for line in content.lines() {
748            let line = line.trim();
749
750            if let Some(caps) = param_re.captures(line) {
751                doc.params.push(ParamDoc {
752                    name: caps
753                        .get(1)
754                        .map(|m| m.as_str().to_owned())
755                        .unwrap_or_default(),
756                    description: caps.get(2).map(|m| m.as_str().to_owned()),
757                    ..Default::default()
758                });
759            } else if let Some(caps) = return_re.captures(line) {
760                doc.returns = Some(ReturnDoc {
761                    description: caps.get(1).map(|m| m.as_str().to_owned()),
762                    ..Default::default()
763                });
764            } else if let Some(caps) = raise_re.captures(line) {
765                doc.throws.push(ThrowsDoc {
766                    exception_type: caps
767                        .get(1)
768                        .map(|m| m.as_str().to_owned())
769                        .unwrap_or_default(),
770                    description: caps.get(2).map(|m| m.as_str().to_owned()),
771                });
772            } else if !line.starts_with('@') {
773                description_lines.push(line);
774            }
775        }
776
777        if !description_lines.is_empty() {
778            doc.summary = Some(description_lines[0].to_owned());
779            doc.description = Some(description_lines.join(" "));
780        }
781
782        doc
783    }
784
785    /// Parse LuaDoc
786    fn parse_luadoc(&self, raw: &str) -> Documentation {
787        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
788
789        // LuaDoc uses --- or --[[ ]]
790        let content: String = raw
791            .lines()
792            .map(|l| l.trim_start_matches("---").trim_start_matches("--").trim())
793            .collect::<Vec<_>>()
794            .join("\n");
795
796        // Parse @param, @return
797        let param_re = Regex::new(r"@param\s+(\w+)\s+(\w+)\s*(.*)").unwrap();
798        let return_re = Regex::new(r"@return\s+(\w+)\s*(.*)").unwrap();
799
800        let mut description_lines = Vec::new();
801
802        for line in content.lines() {
803            let line = line.trim();
804
805            if let Some(caps) = param_re.captures(line) {
806                doc.params.push(ParamDoc {
807                    name: caps
808                        .get(1)
809                        .map(|m| m.as_str().to_owned())
810                        .unwrap_or_default(),
811                    type_info: caps.get(2).map(|m| m.as_str().to_owned()),
812                    description: caps.get(3).map(|m| m.as_str().to_owned()),
813                    ..Default::default()
814                });
815            } else if let Some(caps) = return_re.captures(line) {
816                doc.returns = Some(ReturnDoc {
817                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
818                    description: caps.get(2).map(|m| m.as_str().to_owned()),
819                });
820            } else if !line.starts_with('@') {
821                description_lines.push(line);
822            }
823        }
824
825        if !description_lines.is_empty() {
826            doc.summary = Some(description_lines[0].to_owned());
827            doc.description = Some(description_lines.join(" "));
828        }
829
830        doc
831    }
832
833    /// Parse Roxygen2 (R)
834    fn parse_roxygen(&self, raw: &str) -> Documentation {
835        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
836
837        // Roxygen uses #' @param, #' @return, etc.
838        let content: String = raw
839            .lines()
840            .map(|l| l.trim_start_matches("#'").trim())
841            .collect::<Vec<_>>()
842            .join("\n");
843
844        let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
845        let return_re = Regex::new(r"@return\s+(.*)").unwrap();
846
847        let mut description_lines = Vec::new();
848
849        for line in content.lines() {
850            let line = line.trim();
851
852            if let Some(caps) = param_re.captures(line) {
853                doc.params.push(ParamDoc {
854                    name: caps
855                        .get(1)
856                        .map(|m| m.as_str().to_owned())
857                        .unwrap_or_default(),
858                    description: caps.get(2).map(|m| m.as_str().to_owned()),
859                    ..Default::default()
860                });
861            } else if let Some(caps) = return_re.captures(line) {
862                doc.returns = Some(ReturnDoc {
863                    description: caps.get(1).map(|m| m.as_str().to_owned()),
864                    ..Default::default()
865                });
866            } else if !line.starts_with('@') {
867                description_lines.push(line);
868            }
869        }
870
871        if !description_lines.is_empty() {
872            doc.summary = Some(description_lines[0].to_owned());
873            doc.description = Some(description_lines.join(" "));
874        }
875
876        doc
877    }
878
879    /// Parse Doxygen (C/C++)
880    fn parse_doxygen(&self, raw: &str) -> Documentation {
881        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
882
883        // Doxygen uses /** */, //!, \param, \return, etc.
884        let content = self.strip_comment_markers(raw, "/**", "*/", "*");
885
886        let param_re = Regex::new(r"[@\\]param(?:\[(?:in|out|in,out)\])?\s+(\w+)\s+(.*)").unwrap();
887        let return_re = Regex::new(r"[@\\]returns?\s+(.*)").unwrap();
888        let throws_re = Regex::new(r"[@\\](?:throws?|exception)\s+(\w+)\s*(.*)").unwrap();
889        let brief_re = Regex::new(r"[@\\]brief\s+(.*)").unwrap();
890
891        let mut description_lines = Vec::new();
892
893        for line in content.lines() {
894            let line = line.trim();
895
896            if let Some(caps) = brief_re.captures(line) {
897                doc.summary = caps.get(1).map(|m| m.as_str().to_owned());
898            } else if let Some(caps) = param_re.captures(line) {
899                doc.params.push(ParamDoc {
900                    name: caps
901                        .get(1)
902                        .map(|m| m.as_str().to_owned())
903                        .unwrap_or_default(),
904                    description: caps.get(2).map(|m| m.as_str().to_owned()),
905                    ..Default::default()
906                });
907            } else if let Some(caps) = return_re.captures(line) {
908                doc.returns = Some(ReturnDoc {
909                    description: caps.get(1).map(|m| m.as_str().to_owned()),
910                    ..Default::default()
911                });
912            } else if let Some(caps) = throws_re.captures(line) {
913                doc.throws.push(ThrowsDoc {
914                    exception_type: caps
915                        .get(1)
916                        .map(|m| m.as_str().to_owned())
917                        .unwrap_or_default(),
918                    description: caps.get(2).map(|m| m.as_str().to_owned()),
919                });
920            } else if !line.starts_with('@') && !line.starts_with('\\') {
921                description_lines.push(line);
922            }
923        }
924
925        if doc.summary.is_none() && !description_lines.is_empty() {
926            doc.summary = Some(description_lines[0].to_owned());
927        }
928        if !description_lines.is_empty() {
929            doc.description = Some(description_lines.join(" "));
930        }
931
932        doc
933    }
934
935    /// Parse bash script comments
936    fn parse_bash_comment(&self, raw: &str) -> Documentation {
937        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
938
939        let content: String = raw
940            .lines()
941            .map(|l| l.trim_start_matches('#').trim())
942            .filter(|l| !l.is_empty())
943            .collect::<Vec<_>>()
944            .join(" ");
945
946        doc.description = Some(content.clone());
947        let sentences: Vec<&str> = content.split(". ").collect();
948        if !sentences.is_empty() {
949            doc.summary = Some(sentences[0].to_owned());
950        }
951
952        doc
953    }
954
955    /// Parse generic comment (fallback)
956    fn parse_generic(&self, raw: &str) -> Documentation {
957        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
958
959        // Strip common comment markers
960        let content: String = raw
961            .lines()
962            .map(|l| {
963                l.trim()
964                    .trim_start_matches("//")
965                    .trim_start_matches("/*")
966                    .trim_end_matches("*/")
967                    .trim_start_matches('#')
968                    .trim_start_matches("--")
969                    .trim_start_matches(";;")
970                    .trim()
971            })
972            .filter(|l| !l.is_empty())
973            .collect::<Vec<_>>()
974            .join(" ");
975
976        doc.description = Some(content.clone());
977        let sentences: Vec<&str> = content.split(". ").collect();
978        if !sentences.is_empty() {
979            doc.summary = Some(sentences[0].to_owned());
980        }
981
982        doc
983    }
984
985    // Helper methods
986
987    fn strip_comment_markers(&self, raw: &str, start: &str, end: &str, line: &str) -> String {
988        let mut content = raw
989            .trim()
990            .trim_start_matches(start)
991            .trim_end_matches(end)
992            .to_owned();
993
994        // Remove line prefixes
995        content = content
996            .lines()
997            .map(|l| {
998                let trimmed = l.trim();
999                if trimmed.starts_with(line) {
1000                    trimmed[line.len()..].trim_start()
1001                } else {
1002                    trimmed
1003                }
1004            })
1005            .collect::<Vec<_>>()
1006            .join("\n");
1007
1008        content
1009    }
1010
1011    fn strip_rust_doc_markers(&self, raw: &str) -> String {
1012        raw.lines()
1013            .map(|l| {
1014                let trimmed = l.trim();
1015                if trimmed.starts_with("///") {
1016                    trimmed[3..].trim_start()
1017                } else if trimmed.starts_with("//!") {
1018                    trimmed[3..].trim_start()
1019                } else if trimmed.starts_with("/**") {
1020                    trimmed[3..].trim_start()
1021                } else if trimmed.starts_with('*') {
1022                    trimmed[1..].trim_start()
1023                } else if trimmed == "*/" {
1024                    ""
1025                } else {
1026                    trimmed
1027                }
1028            })
1029            .collect::<Vec<_>>()
1030            .join("\n")
1031    }
1032}
1033
1034impl Default for DocumentationExtractor {
1035    fn default() -> Self {
1036        Self::new()
1037    }
1038}
1039
1040#[cfg(test)]
1041mod tests {
1042    use super::*;
1043
1044    // ---------------------------------------------------------------
1045    // Helper
1046    // ---------------------------------------------------------------
1047
1048    fn ext() -> DocumentationExtractor {
1049        DocumentationExtractor::new()
1050    }
1051
1052    // ---------------------------------------------------------------
1053    // Empty / whitespace edge cases
1054    // ---------------------------------------------------------------
1055
1056    #[test]
1057    fn test_empty_string_returns_default() {
1058        let doc = ext().extract("", Language::JavaScript);
1059        assert!(doc.summary.is_none());
1060        assert!(doc.description.is_none());
1061        assert!(doc.params.is_empty());
1062        assert!(doc.returns.is_none());
1063        assert!(doc.throws.is_empty());
1064        assert!(doc.examples.is_empty());
1065        assert!(!doc.is_deprecated);
1066        assert!(doc.raw.is_none());
1067    }
1068
1069    #[test]
1070    fn test_whitespace_only_returns_default() {
1071        let doc = ext().extract("   \n\t  \n  ", Language::Python);
1072        assert!(doc.summary.is_none());
1073        assert!(doc.raw.is_none());
1074    }
1075
1076    // ---------------------------------------------------------------
1077    // JSDoc / JavaScript / TypeScript
1078    // ---------------------------------------------------------------
1079
1080    #[test]
1081    fn test_jsdoc_parsing() {
1082        let jsdoc = r#"/**
1083         * Calculate the sum of two numbers.
1084         *
1085         * @param {number} a - The first number
1086         * @param {number} b - The second number
1087         * @returns {number} The sum of a and b
1088         * @throws {Error} If inputs are not numbers
1089         * @example
1090         * add(1, 2) // returns 3
1091         */
1092        "#;
1093
1094        let doc = ext().extract(jsdoc, Language::JavaScript);
1095
1096        assert!(doc.summary.is_some());
1097        assert!(doc.summary.unwrap().contains("Calculate"));
1098        assert_eq!(doc.params.len(), 2);
1099        assert_eq!(doc.params[0].name, "a");
1100        assert!(doc.params[0].type_info.as_ref().unwrap().contains("number"));
1101        assert!(doc.returns.is_some());
1102        assert_eq!(doc.throws.len(), 1);
1103        assert_eq!(doc.examples.len(), 1);
1104    }
1105
1106    #[test]
1107    fn test_jsdoc_optional_param() {
1108        let jsdoc = "/**\n * @param {string} [name] - Optional name\n */";
1109        let doc = ext().extract(jsdoc, Language::JavaScript);
1110
1111        assert_eq!(doc.params.len(), 1);
1112        assert_eq!(doc.params[0].name, "name");
1113        assert!(doc.params[0].is_optional);
1114        assert_eq!(doc.params[0].type_info.as_deref(), Some("string"));
1115    }
1116
1117    #[test]
1118    fn test_jsdoc_param_no_type() {
1119        let jsdoc = "/**\n * @param x - The value\n */";
1120        let doc = ext().extract(jsdoc, Language::JavaScript);
1121
1122        assert_eq!(doc.params.len(), 1);
1123        assert_eq!(doc.params[0].name, "x");
1124        assert!(doc.params[0].type_info.is_none());
1125        assert_eq!(doc.params[0].description.as_deref(), Some("The value"));
1126    }
1127
1128    #[test]
1129    fn test_jsdoc_param_no_description() {
1130        let jsdoc = "/**\n * @param {number} x\n */";
1131        let doc = ext().extract(jsdoc, Language::JavaScript);
1132
1133        assert_eq!(doc.params.len(), 1);
1134        assert_eq!(doc.params[0].name, "x");
1135        // Empty description gets stored as None
1136        assert!(doc.params[0].description.is_none());
1137    }
1138
1139    #[test]
1140    fn test_jsdoc_multiple_throws() {
1141        let jsdoc = "/**\n * Do stuff.\n * @throws {TypeError} Bad type\n * @throws {RangeError} Out of range\n */";
1142        let doc = ext().extract(jsdoc, Language::JavaScript);
1143
1144        assert_eq!(doc.throws.len(), 2);
1145        assert_eq!(doc.throws[0].exception_type, "TypeError");
1146        assert_eq!(doc.throws[1].exception_type, "RangeError");
1147    }
1148
1149    #[test]
1150    fn test_jsdoc_returns_without_type() {
1151        let jsdoc = "/**\n * @returns The result\n */";
1152        let doc = ext().extract(jsdoc, Language::JavaScript);
1153
1154        assert!(doc.returns.is_some());
1155        let ret = doc.returns.unwrap();
1156        assert!(ret.type_info.is_none());
1157        assert_eq!(ret.description.as_deref(), Some("The result"));
1158    }
1159
1160    #[test]
1161    fn test_jsdoc_multiple_examples() {
1162        // Two @example tags: the second starts with @example so the condition
1163        // `!line.starts_with("@example")` is false, meaning the first example
1164        // is not saved separately. Both lines end up in one combined example.
1165        let jsdoc = "/**\n * Math helper.\n * @example\n * add(1,2)\n * @example\n * add(3,4)\n */";
1166        let doc = ext().extract(jsdoc, Language::JavaScript);
1167
1168        assert_eq!(doc.examples.len(), 1);
1169        assert!(doc.examples[0].code.contains("add(1,2)"));
1170        assert!(doc.examples[0].code.contains("add(3,4)"));
1171    }
1172
1173    #[test]
1174    fn test_jsdoc_deprecated_without_message() {
1175        let jsdoc = "/**\n * Old.\n * @deprecated\n */";
1176        let doc = ext().extract(jsdoc, Language::JavaScript);
1177
1178        assert!(doc.is_deprecated);
1179        // @deprecated with nothing after it: deprecation_message should be None
1180        assert!(doc.deprecation_message.is_none());
1181    }
1182
1183    #[test]
1184    fn test_jsdoc_deprecated_with_message() {
1185        let jsdoc = "/**\n * Old.\n * @deprecated Use bar instead\n */";
1186        let doc = ext().extract(jsdoc, Language::JavaScript);
1187
1188        assert!(doc.is_deprecated);
1189        assert_eq!(doc.deprecation_message.as_deref(), Some("Use bar instead"));
1190    }
1191
1192    #[test]
1193    fn test_jsdoc_custom_tags() {
1194        let jsdoc = "/**\n * My func.\n * @since 2.0\n * @see otherFunc\n */";
1195        let doc = ext().extract(jsdoc, Language::JavaScript);
1196
1197        assert!(doc.tags.contains_key("since"));
1198        assert!(doc.tags.contains_key("see"));
1199        assert_eq!(doc.tags["since"][0], "2.0");
1200    }
1201
1202    #[test]
1203    fn test_jsdoc_multiline_description() {
1204        let jsdoc = "/**\n * First sentence. Second sentence.\n * Third sentence.\n */";
1205        let doc = ext().extract(jsdoc, Language::JavaScript);
1206
1207        // Summary is first part split by ". "
1208        let summary = doc.summary.unwrap();
1209        assert!(summary.contains("First sentence"));
1210        let desc = doc.description.unwrap();
1211        assert!(desc.contains("Third sentence."));
1212    }
1213
1214    #[test]
1215    fn test_jsdoc_typescript_dispatch() {
1216        let jsdoc = "/**\n * A TS function.\n * @param {string} s - input\n */";
1217        let doc = ext().extract(jsdoc, Language::TypeScript);
1218
1219        assert!(doc.summary.unwrap().contains("TS function"));
1220        assert_eq!(doc.params.len(), 1);
1221    }
1222
1223    #[test]
1224    fn test_jsdoc_example_with_inline_content() {
1225        let jsdoc = "/**\n * Func.\n * @example const x = foo();\n */";
1226        let doc = ext().extract(jsdoc, Language::JavaScript);
1227
1228        assert_eq!(doc.examples.len(), 1);
1229        assert!(doc.examples[0].code.contains("const x = foo();"));
1230    }
1231
1232    // ---------------------------------------------------------------
1233    // Python docstrings
1234    // ---------------------------------------------------------------
1235
1236    #[test]
1237    fn test_python_docstring_parsing() {
1238        let docstring = r#""""
1239        Calculate the sum of two numbers.
1240
1241        Args:
1242            a (int): The first number
1243            b (int): The second number
1244
1245        Returns:
1246            int: The sum of a and b
1247
1248        Raises:
1249            ValueError: If inputs are not integers
1250        """"#;
1251
1252        let doc = ext().extract(docstring, Language::Python);
1253
1254        assert!(doc.summary.is_some());
1255        assert!(doc.summary.unwrap().contains("Calculate"));
1256        assert_eq!(doc.params.len(), 2);
1257        assert_eq!(doc.params[0].name, "a");
1258        assert!(doc.returns.is_some());
1259        assert_eq!(doc.throws.len(), 1);
1260    }
1261
1262    #[test]
1263    fn test_python_single_quote_docstring() {
1264        let docstring = "'''Sum two numbers.\n\nArgs:\n    x (float): first\n'''";
1265        let doc = ext().extract(docstring, Language::Python);
1266
1267        assert!(doc.summary.unwrap().contains("Sum two numbers"));
1268        assert_eq!(doc.params.len(), 1);
1269        assert_eq!(doc.params[0].name, "x");
1270        assert_eq!(doc.params[0].type_info.as_deref(), Some("float"));
1271    }
1272
1273    #[test]
1274    fn test_python_parameters_header() {
1275        let docstring = "\"\"\"Do stuff.\n\nParameters:\n    n (int): count\n\"\"\"";
1276        let doc = ext().extract(docstring, Language::Python);
1277
1278        assert_eq!(doc.params.len(), 1);
1279        assert_eq!(doc.params[0].name, "n");
1280    }
1281
1282    #[test]
1283    fn test_python_arguments_header() {
1284        let docstring = "\"\"\"Do stuff.\n\nArguments:\n    n (int): count\n\"\"\"";
1285        let doc = ext().extract(docstring, Language::Python);
1286
1287        assert_eq!(doc.params.len(), 1);
1288        assert_eq!(doc.params[0].name, "n");
1289    }
1290
1291    #[test]
1292    fn test_python_multiple_raises() {
1293        let docstring =
1294            "\"\"\"Do stuff.\n\nRaises:\n    ValueError: bad\n    TypeError: wrong type\n\"\"\"";
1295        let doc = ext().extract(docstring, Language::Python);
1296
1297        assert_eq!(doc.throws.len(), 2);
1298        assert_eq!(doc.throws[0].exception_type, "ValueError");
1299        assert_eq!(doc.throws[1].exception_type, "TypeError");
1300    }
1301
1302    #[test]
1303    fn test_python_throws_header() {
1304        let docstring = "\"\"\"Do stuff.\n\nThrows:\n    IOError: disk full\n\"\"\"";
1305        let doc = ext().extract(docstring, Language::Python);
1306
1307        assert_eq!(doc.throws.len(), 1);
1308        assert_eq!(doc.throws[0].exception_type, "IOError");
1309    }
1310
1311    #[test]
1312    fn test_python_exceptions_header() {
1313        let docstring = "\"\"\"Do stuff.\n\nExceptions:\n    OSError: not found\n\"\"\"";
1314        let doc = ext().extract(docstring, Language::Python);
1315
1316        assert_eq!(doc.throws.len(), 1);
1317        assert_eq!(doc.throws[0].exception_type, "OSError");
1318    }
1319
1320    #[test]
1321    fn test_python_example_section() {
1322        let docstring = "\"\"\"Do stuff.\n\nExample:\n    >>> foo(1)\n    42\n\"\"\"";
1323        let doc = ext().extract(docstring, Language::Python);
1324
1325        assert_eq!(doc.examples.len(), 1);
1326        assert!(doc.examples[0].code.contains("foo(1)"));
1327        assert_eq!(doc.examples[0].language.as_deref(), Some("python"));
1328    }
1329
1330    #[test]
1331    fn test_python_examples_plural_header() {
1332        let docstring = "\"\"\"Do stuff.\n\nExamples:\n    >>> bar()\n\"\"\"";
1333        let doc = ext().extract(docstring, Language::Python);
1334
1335        assert_eq!(doc.examples.len(), 1);
1336    }
1337
1338    #[test]
1339    fn test_python_return_singular_header() {
1340        let docstring = "\"\"\"Do stuff.\n\nReturn:\n    int: the result\n\"\"\"";
1341        let doc = ext().extract(docstring, Language::Python);
1342
1343        assert!(doc.returns.is_some());
1344    }
1345
1346    #[test]
1347    fn test_python_param_no_type() {
1348        let docstring = "\"\"\"Do stuff.\n\nArgs:\n    name: the name value\n\"\"\"";
1349        let doc = ext().extract(docstring, Language::Python);
1350
1351        assert_eq!(doc.params.len(), 1);
1352        assert_eq!(doc.params[0].name, "name");
1353        assert!(doc.params[0].type_info.is_none());
1354    }
1355
1356    #[test]
1357    fn test_python_multiline_param_description() {
1358        let docstring =
1359            "\"\"\"Do stuff.\n\nArgs:\n    x (int): First line\n        continued here\n\"\"\"";
1360        let doc = ext().extract(docstring, Language::Python);
1361
1362        assert_eq!(doc.params.len(), 1);
1363        let desc = doc.params[0].description.as_ref().unwrap();
1364        assert!(desc.contains("First line"));
1365        assert!(desc.contains("continued here"));
1366    }
1367
1368    #[test]
1369    fn test_python_multiline_returns_description() {
1370        let docstring =
1371            "\"\"\"Do stuff.\n\nReturns:\n    int: First line\n        more info\n\"\"\"";
1372        let doc = ext().extract(docstring, Language::Python);
1373
1374        let ret = doc.returns.unwrap();
1375        let desc = ret.description.unwrap();
1376        assert!(desc.contains("First line"));
1377        assert!(desc.contains("more info"));
1378    }
1379
1380    #[test]
1381    fn test_python_description_only() {
1382        let docstring = "\"\"\"A simple description with no sections.\"\"\"";
1383        let doc = ext().extract(docstring, Language::Python);
1384
1385        assert!(doc.summary.unwrap().contains("simple description"));
1386        assert!(doc.params.is_empty());
1387        assert!(doc.returns.is_none());
1388    }
1389
1390    #[test]
1391    fn test_python_other_section_ignored() {
1392        let docstring = "\"\"\"Do stuff.\n\nNotes:\n    Some note here.\n\"\"\"";
1393        let doc = ext().extract(docstring, Language::Python);
1394
1395        // "Notes:" triggers Section::Other, content ignored
1396        assert!(doc.params.is_empty());
1397        assert!(doc.returns.is_none());
1398    }
1399
1400    // ---------------------------------------------------------------
1401    // Rust doc comments
1402    // ---------------------------------------------------------------
1403
1404    #[test]
1405    fn test_rust_doc_parsing() {
1406        let rust_doc = "/// Calculate the sum of two numbers.\n///\n/// # Arguments\n///\n/// * `a` - The first number\n/// * `b` - The second number\n///\n/// # Returns\n///\n/// The sum of a and b";
1407
1408        let doc = ext().extract(rust_doc, Language::Rust);
1409
1410        assert!(doc.summary.is_some());
1411        assert!(doc.summary.unwrap().contains("Calculate"));
1412        assert_eq!(doc.params.len(), 2);
1413        assert_eq!(doc.params[0].name, "a");
1414        assert_eq!(doc.params[1].name, "b");
1415        assert!(doc.returns.is_some());
1416    }
1417
1418    #[test]
1419    fn test_rust_inner_doc_comment() {
1420        let doc_str = "//! Module level documentation.\n//! Second line.";
1421        let doc = ext().extract(doc_str, Language::Rust);
1422
1423        assert!(doc.summary.unwrap().contains("Module level documentation"));
1424    }
1425
1426    #[test]
1427    fn test_rust_block_doc_comment() {
1428        let doc_str = "/** Block doc comment.\n * More details here.\n */";
1429        let doc = ext().extract(doc_str, Language::Rust);
1430
1431        assert!(doc.description.unwrap().contains("Block doc comment"));
1432    }
1433
1434    #[test]
1435    fn test_rust_errors_section() {
1436        let doc_str = "/// Do something.\n///\n/// # Errors\n///\n/// Returns Err if file not found.\n/// Also returns Err on permission denied.";
1437        let doc = ext().extract(doc_str, Language::Rust);
1438
1439        assert_eq!(doc.throws.len(), 2);
1440        assert_eq!(doc.throws[0].exception_type, "Error");
1441        assert!(doc.throws[0]
1442            .description
1443            .as_ref()
1444            .unwrap()
1445            .contains("file not found"));
1446    }
1447
1448    #[test]
1449    fn test_rust_panics_section() {
1450        // The empty line after # Panics produces an empty entry, then the real content
1451        let doc_str =
1452            "/// Do something.\n///\n/// # Panics\n///\n/// Panics if index is out of bounds.";
1453        let doc = ext().extract(doc_str, Language::Rust);
1454
1455        assert!(doc.tags.contains_key("panics"));
1456        let panics_entries = &doc.tags["panics"];
1457        assert!(panics_entries.iter().any(|e| e.contains("out of bounds")));
1458    }
1459
1460    #[test]
1461    fn test_rust_safety_section() {
1462        // The empty line after # Safety produces an empty entry, then the real content
1463        let doc_str =
1464            "/// Unsafe op.\n///\n/// # Safety\n///\n/// Caller must ensure pointer is valid.";
1465        let doc = ext().extract(doc_str, Language::Rust);
1466
1467        assert!(doc.tags.contains_key("safety"));
1468        let safety_entries = &doc.tags["safety"];
1469        assert!(safety_entries
1470            .iter()
1471            .any(|e| e.contains("pointer is valid")));
1472    }
1473
1474    #[test]
1475    fn test_rust_examples_with_code_block() {
1476        let doc_str =
1477            "/// A function.\n///\n/// # Examples\n///\n/// ```rust\n/// let x = foo();\n/// ```";
1478        let doc = ext().extract(doc_str, Language::Rust);
1479
1480        assert_eq!(doc.examples.len(), 1);
1481        assert!(doc.examples[0].code.contains("let x = foo();"));
1482        assert_eq!(doc.examples[0].language.as_deref(), Some("rust"));
1483    }
1484
1485    #[test]
1486    fn test_rust_examples_code_block_no_lang() {
1487        let doc_str = "/// A function.\n///\n/// # Examples\n///\n/// ```\n/// foo();\n/// ```";
1488        let doc = ext().extract(doc_str, Language::Rust);
1489
1490        assert_eq!(doc.examples.len(), 1);
1491        assert!(doc.examples[0].code.contains("foo();"));
1492    }
1493
1494    #[test]
1495    fn test_rust_example_singular_header() {
1496        let doc_str = "/// A function.\n///\n/// # Example\n///\n/// ```\n/// bar();\n/// ```";
1497        let doc = ext().extract(doc_str, Language::Rust);
1498
1499        assert_eq!(doc.examples.len(), 1);
1500    }
1501
1502    #[test]
1503    fn test_rust_parameters_header() {
1504        let doc_str = "/// Do it.\n///\n/// # Parameters\n///\n/// * `x` - The x value";
1505        let doc = ext().extract(doc_str, Language::Rust);
1506
1507        assert_eq!(doc.params.len(), 1);
1508        assert_eq!(doc.params[0].name, "x");
1509    }
1510
1511    #[test]
1512    fn test_rust_unknown_header_falls_back_to_description() {
1513        let doc_str = "/// Do it.\n///\n/// # Implementation Details\n///\n/// Uses a hash map.";
1514        let doc = ext().extract(doc_str, Language::Rust);
1515
1516        // Unknown headers fall back to Section::Description
1517        let desc = doc.description.unwrap();
1518        assert!(desc.contains("Uses a hash map"));
1519    }
1520
1521    #[test]
1522    fn test_rust_errors_empty_lines_skipped() {
1523        let doc_str = "/// Do it.\n///\n/// # Errors\n///\n/// \n/// Real error here.";
1524        let doc = ext().extract(doc_str, Language::Rust);
1525
1526        // Empty line should not produce a ThrowsDoc entry
1527        assert_eq!(doc.throws.len(), 1);
1528        assert!(doc.throws[0]
1529            .description
1530            .as_ref()
1531            .unwrap()
1532            .contains("Real error"));
1533    }
1534
1535    // ---------------------------------------------------------------
1536    // JavaDoc / Kotlin (delegates to JSDoc parser)
1537    // ---------------------------------------------------------------
1538
1539    #[test]
1540    fn test_javadoc_parsing() {
1541        let javadoc = "/**\n * Process the data.\n *\n * @param input the input data\n * @return the processed result\n * @throws IOException if reading fails\n */";
1542        let doc = ext().extract(javadoc, Language::Java);
1543
1544        assert!(doc.summary.unwrap().contains("Process the data"));
1545        assert_eq!(doc.params.len(), 1);
1546        assert_eq!(doc.params[0].name, "input");
1547        assert!(doc.returns.is_some());
1548        assert_eq!(doc.throws.len(), 1);
1549    }
1550
1551    #[test]
1552    fn test_kotlin_delegates_to_javadoc() {
1553        let kdoc = "/**\n * Kotlin function.\n * @param name the name\n */";
1554        let doc = ext().extract(kdoc, Language::Kotlin);
1555
1556        assert!(doc.summary.unwrap().contains("Kotlin function"));
1557        assert_eq!(doc.params.len(), 1);
1558    }
1559
1560    // ---------------------------------------------------------------
1561    // Go doc comments
1562    // ---------------------------------------------------------------
1563
1564    #[test]
1565    fn test_go_doc_basic() {
1566        let go_doc = "// Calculate returns the sum of a and b. It panics on overflow.";
1567        let doc = ext().extract(go_doc, Language::Go);
1568
1569        assert_eq!(doc.summary.as_deref(), Some("Calculate returns the sum of a and b"));
1570        let desc = doc.description.unwrap();
1571        assert!(desc.contains("panics on overflow"));
1572    }
1573
1574    #[test]
1575    fn test_go_doc_multiline() {
1576        let go_doc = "// First line.\n// Second line.\n// Third line.";
1577        let doc = ext().extract(go_doc, Language::Go);
1578
1579        let desc = doc.description.unwrap();
1580        assert!(desc.contains("First line."));
1581        assert!(desc.contains("Third line."));
1582    }
1583
1584    #[test]
1585    fn test_go_doc_deprecated() {
1586        let go_doc = "// Deprecated: Use NewFunc instead.\n// This function is old.";
1587        let doc = ext().extract(go_doc, Language::Go);
1588
1589        assert!(doc.is_deprecated);
1590    }
1591
1592    #[test]
1593    fn test_go_doc_not_deprecated() {
1594        let go_doc = "// Process handles the request.";
1595        let doc = ext().extract(go_doc, Language::Go);
1596
1597        assert!(!doc.is_deprecated);
1598    }
1599
1600    // ---------------------------------------------------------------
1601    // Ruby YARD
1602    // ---------------------------------------------------------------
1603
1604    #[test]
1605    fn test_ruby_yard_doc() {
1606        let yard = "# Calculate the sum.\n# @param [Integer] a the first number\n# @param [Integer] b the second number\n# @return [Integer] the sum\n# @raise [ArgumentError] if inputs are invalid";
1607        let doc = ext().extract(yard, Language::Ruby);
1608
1609        assert!(doc.summary.unwrap().contains("Calculate the sum"));
1610        assert_eq!(doc.params.len(), 2);
1611        assert_eq!(doc.params[0].name, "a");
1612        assert_eq!(doc.params[0].type_info.as_deref(), Some("Integer"));
1613        assert!(doc.returns.is_some());
1614        assert_eq!(doc.returns.unwrap().type_info.as_deref(), Some("Integer"));
1615        assert_eq!(doc.throws.len(), 1);
1616        assert_eq!(doc.throws[0].exception_type, "ArgumentError");
1617    }
1618
1619    #[test]
1620    fn test_ruby_description_only() {
1621        let yard = "# A simple helper method.";
1622        let doc = ext().extract(yard, Language::Ruby);
1623
1624        assert!(doc.summary.unwrap().contains("simple helper"));
1625        assert!(doc.params.is_empty());
1626    }
1627
1628    // ---------------------------------------------------------------
1629    // PHP (delegates to JSDoc parser)
1630    // ---------------------------------------------------------------
1631
1632    #[test]
1633    fn test_phpdoc_parsing() {
1634        let phpdoc =
1635            "/**\n * Send an email.\n * @param string $to Recipient address\n * @return bool\n */";
1636        let doc = ext().extract(phpdoc, Language::Php);
1637
1638        assert!(doc.summary.unwrap().contains("Send an email"));
1639        assert_eq!(doc.params.len(), 1);
1640        assert!(doc.returns.is_some());
1641    }
1642
1643    // ---------------------------------------------------------------
1644    // C# XML documentation
1645    // ---------------------------------------------------------------
1646
1647    #[test]
1648    fn test_csharp_xml_doc() {
1649        let csharp_doc = "/// <summary>\n/// Calculates the area.\n/// </summary>\n/// <param name=\"width\">The width</param>\n/// <param name=\"height\">The height</param>\n/// <returns>The area value</returns>\n/// <exception cref=\"ArgumentException\">If negative</exception>";
1650        let doc = ext().extract(csharp_doc, Language::CSharp);
1651
1652        assert!(doc.summary.unwrap().contains("Calculates the area"));
1653        assert_eq!(doc.params.len(), 2);
1654        assert_eq!(doc.params[0].name, "width");
1655        assert_eq!(doc.params[1].name, "height");
1656        assert!(doc.returns.is_some());
1657        assert!(doc
1658            .returns
1659            .unwrap()
1660            .description
1661            .unwrap()
1662            .contains("area value"));
1663        assert_eq!(doc.throws.len(), 1);
1664        assert_eq!(doc.throws[0].exception_type, "ArgumentException");
1665    }
1666
1667    #[test]
1668    fn test_csharp_summary_only() {
1669        let csharp_doc = "/// <summary>Simple summary.</summary>";
1670        let doc = ext().extract(csharp_doc, Language::CSharp);
1671
1672        assert_eq!(doc.summary.as_deref(), Some("Simple summary."));
1673        assert!(doc.params.is_empty());
1674    }
1675
1676    // ---------------------------------------------------------------
1677    // Swift documentation
1678    // ---------------------------------------------------------------
1679
1680    #[test]
1681    fn test_swift_doc() {
1682        let swift_doc = "/// Calculates the distance.\n///\n/// - Parameter from: The start point\n/// - Parameter to: The end point\n/// - Returns: The distance\n/// - Throws: An error if coordinates are invalid";
1683        let doc = ext().extract(swift_doc, Language::Swift);
1684
1685        assert!(doc.summary.unwrap().contains("Calculates the distance"));
1686        assert_eq!(doc.params.len(), 2);
1687        assert_eq!(doc.params[0].name, "from");
1688        assert_eq!(doc.params[1].name, "to");
1689        assert!(doc.returns.is_some());
1690        assert_eq!(doc.throws.len(), 1);
1691        assert_eq!(doc.throws[0].exception_type, "Error");
1692    }
1693
1694    #[test]
1695    fn test_swift_description_only() {
1696        let swift_doc = "/// A simple utility function.";
1697        let doc = ext().extract(swift_doc, Language::Swift);
1698
1699        assert!(doc.summary.unwrap().contains("simple utility"));
1700    }
1701
1702    // ---------------------------------------------------------------
1703    // Scala (delegates to JavaDoc)
1704    // ---------------------------------------------------------------
1705
1706    #[test]
1707    fn test_scaladoc_delegates() {
1708        let scaladoc = "/**\n * Scala function.\n * @param x the input\n * @return the output\n */";
1709        let doc = ext().extract(scaladoc, Language::Scala);
1710
1711        assert!(doc.summary.unwrap().contains("Scala function"));
1712        assert_eq!(doc.params.len(), 1);
1713        assert!(doc.returns.is_some());
1714    }
1715
1716    // ---------------------------------------------------------------
1717    // Haskell Haddock
1718    // ---------------------------------------------------------------
1719
1720    #[test]
1721    fn test_haddock_basic() {
1722        let haddock = "-- | Compute the factorial. It uses recursion.";
1723        let doc = ext().extract(haddock, Language::Haskell);
1724
1725        assert!(doc.summary.unwrap().contains("Compute the factorial"));
1726        assert!(doc.description.unwrap().contains("recursion"));
1727    }
1728
1729    #[test]
1730    fn test_haddock_multiline() {
1731        let haddock = "-- | First line.\n-- Second line.";
1732        let doc = ext().extract(haddock, Language::Haskell);
1733
1734        let desc = doc.description.unwrap();
1735        assert!(desc.contains("First line."));
1736        assert!(desc.contains("Second line."));
1737    }
1738
1739    #[test]
1740    fn test_haddock_caret_prefix() {
1741        let haddock = "-- ^ Argument documentation.";
1742        let doc = ext().extract(haddock, Language::Haskell);
1743
1744        assert!(doc.description.unwrap().contains("Argument documentation"));
1745    }
1746
1747    // ---------------------------------------------------------------
1748    // Elixir ExDoc
1749    // ---------------------------------------------------------------
1750
1751    #[test]
1752    fn test_exdoc_basic() {
1753        let exdoc = "@doc \"\"\"\nFetches a user by ID.\n\n* id: The user identifier\n\"\"\"";
1754        let doc = ext().extract(exdoc, Language::Elixir);
1755
1756        assert!(doc.summary.unwrap().contains("Fetches a user by ID"));
1757        assert_eq!(doc.params.len(), 1);
1758        assert_eq!(doc.params[0].name, "id");
1759    }
1760
1761    #[test]
1762    fn test_exdoc_moduledoc() {
1763        let exdoc = "@moduledoc \"\"\"\nThis module handles authentication.\n\"\"\"";
1764        let doc = ext().extract(exdoc, Language::Elixir);
1765
1766        assert!(doc.summary.unwrap().contains("authentication"));
1767    }
1768
1769    #[test]
1770    fn test_exdoc_dash_list_params() {
1771        let exdoc = "@doc \"\"\"\nDo stuff.\n\n- name: The name\n- age: The age\n\"\"\"";
1772        let doc = ext().extract(exdoc, Language::Elixir);
1773
1774        assert_eq!(doc.params.len(), 2);
1775        assert_eq!(doc.params[0].name, "name");
1776        assert_eq!(doc.params[1].name, "age");
1777    }
1778
1779    // ---------------------------------------------------------------
1780    // Clojure docstring
1781    // ---------------------------------------------------------------
1782
1783    #[test]
1784    fn test_clojure_doc_basic() {
1785        let clj = "\"Adds two numbers together. Returns their sum.\"";
1786        let doc = ext().extract(clj, Language::Clojure);
1787
1788        assert!(doc.summary.unwrap().contains("Adds two numbers together"));
1789        assert!(doc.description.unwrap().contains("Returns their sum"));
1790    }
1791
1792    #[test]
1793    fn test_clojure_doc_no_period() {
1794        let clj = "\"Simple function without a period\"";
1795        let doc = ext().extract(clj, Language::Clojure);
1796
1797        assert_eq!(doc.summary.as_deref(), Some("Simple function without a period"));
1798    }
1799
1800    // ---------------------------------------------------------------
1801    // OCaml OCamldoc
1802    // ---------------------------------------------------------------
1803
1804    #[test]
1805    fn test_ocamldoc_basic() {
1806        let ocaml = "(** Compute the length.\n@param lst the input list\n@return the number of elements\n@raise Invalid_argument if list is circular\n*)";
1807        let doc = ext().extract(ocaml, Language::OCaml);
1808
1809        assert!(doc.summary.unwrap().contains("Compute the length"));
1810        assert_eq!(doc.params.len(), 1);
1811        assert_eq!(doc.params[0].name, "lst");
1812        assert!(doc.returns.is_some());
1813        assert_eq!(doc.throws.len(), 1);
1814        assert_eq!(doc.throws[0].exception_type, "Invalid_argument");
1815    }
1816
1817    // ---------------------------------------------------------------
1818    // Lua LuaDoc
1819    // ---------------------------------------------------------------
1820
1821    #[test]
1822    fn test_luadoc_basic() {
1823        let lua = "--- Process the data.\n--- @param input string The input data\n--- @return boolean True on success";
1824        let doc = ext().extract(lua, Language::Lua);
1825
1826        assert!(doc.summary.unwrap().contains("Process the data"));
1827        assert_eq!(doc.params.len(), 1);
1828        assert_eq!(doc.params[0].name, "input");
1829        assert_eq!(doc.params[0].type_info.as_deref(), Some("string"));
1830        assert!(doc.returns.is_some());
1831        assert_eq!(doc.returns.unwrap().type_info.as_deref(), Some("boolean"));
1832    }
1833
1834    // ---------------------------------------------------------------
1835    // R Roxygen2
1836    // ---------------------------------------------------------------
1837
1838    #[test]
1839    fn test_roxygen_basic() {
1840        let rox =
1841            "#' Calculate the mean.\n#' @param x A numeric vector\n#' @return The arithmetic mean";
1842        let doc = ext().extract(rox, Language::R);
1843
1844        assert!(doc.summary.unwrap().contains("Calculate the mean"));
1845        assert_eq!(doc.params.len(), 1);
1846        assert_eq!(doc.params[0].name, "x");
1847        assert!(doc.returns.is_some());
1848    }
1849
1850    // ---------------------------------------------------------------
1851    // Doxygen (C / C++)
1852    // ---------------------------------------------------------------
1853
1854    #[test]
1855    fn test_doxygen_basic() {
1856        let dox = "/**\n * @brief Calculate the sum.\n * @param a First operand\n * @param b Second operand\n * @return The sum\n * @throws std::overflow_error On overflow\n */";
1857        let doc = ext().extract(dox, Language::Cpp);
1858
1859        assert_eq!(doc.summary.as_deref(), Some("Calculate the sum."));
1860        assert_eq!(doc.params.len(), 2);
1861        assert_eq!(doc.params[0].name, "a");
1862        assert_eq!(doc.params[1].name, "b");
1863        assert!(doc.returns.is_some());
1864        assert_eq!(doc.throws.len(), 1);
1865    }
1866
1867    #[test]
1868    fn test_doxygen_c_dispatch() {
1869        let dox = "/**\n * @brief A C function.\n * @param x input\n */";
1870        let doc = ext().extract(dox, Language::C);
1871
1872        assert_eq!(doc.summary.as_deref(), Some("A C function."));
1873        assert_eq!(doc.params.len(), 1);
1874    }
1875
1876    #[test]
1877    fn test_doxygen_backslash_syntax() {
1878        let dox = "/**\n * \\brief Backslash style.\n * \\param n count\n * \\return the result\n * \\throws bad_alloc on memory failure\n */";
1879        let doc = ext().extract(dox, Language::Cpp);
1880
1881        assert_eq!(doc.summary.as_deref(), Some("Backslash style."));
1882        assert_eq!(doc.params.len(), 1);
1883        assert_eq!(doc.params[0].name, "n");
1884        assert!(doc.returns.is_some());
1885        assert_eq!(doc.throws.len(), 1);
1886    }
1887
1888    #[test]
1889    fn test_doxygen_param_direction() {
1890        let dox =
1891            "/**\n * @param[in] x input\n * @param[out] y output\n * @param[in,out] z both\n */";
1892        let doc = ext().extract(dox, Language::Cpp);
1893
1894        assert_eq!(doc.params.len(), 3);
1895        assert_eq!(doc.params[0].name, "x");
1896        assert_eq!(doc.params[1].name, "y");
1897        assert_eq!(doc.params[2].name, "z");
1898    }
1899
1900    #[test]
1901    fn test_doxygen_no_brief_uses_first_line() {
1902        let dox = "/**\n * First line as description.\n * @param x input\n */";
1903        let doc = ext().extract(dox, Language::Cpp);
1904
1905        // Without @brief, the first non-empty description line becomes summary
1906        let summary = doc.summary.unwrap();
1907        // The first line after stripping may be empty (from the newline after /**),
1908        // so the summary may be empty or the actual first line
1909        assert!(summary.is_empty() || summary.contains("First line as description"));
1910    }
1911
1912    // ---------------------------------------------------------------
1913    // Bash comments
1914    // ---------------------------------------------------------------
1915
1916    #[test]
1917    fn test_bash_comment_basic() {
1918        let bash = "# Deploy the application. Restarts the service.";
1919        let doc = ext().extract(bash, Language::Bash);
1920
1921        assert!(doc.summary.unwrap().contains("Deploy the application"));
1922    }
1923
1924    #[test]
1925    fn test_bash_multiline_comment() {
1926        let bash = "# First line.\n# Second line.\n# Third line.";
1927        let doc = ext().extract(bash, Language::Bash);
1928
1929        let desc = doc.description.unwrap();
1930        assert!(desc.contains("First line."));
1931        assert!(desc.contains("Third line."));
1932    }
1933
1934    #[test]
1935    fn test_bash_empty_comment_lines_filtered() {
1936        let bash = "# Content here.\n#\n# More content.";
1937        let doc = ext().extract(bash, Language::Bash);
1938
1939        let desc = doc.description.unwrap();
1940        assert!(desc.contains("Content here."));
1941        assert!(desc.contains("More content."));
1942    }
1943
1944    // ---------------------------------------------------------------
1945    // Generic / fallback parser
1946    // ---------------------------------------------------------------
1947
1948    #[test]
1949    fn test_generic_fallback() {
1950        // FSharp is not explicitly matched, so it goes to generic
1951        let comment = "// A generic comment.";
1952        let doc = ext().extract(comment, Language::FSharp);
1953
1954        assert!(doc.summary.unwrap().contains("generic comment"));
1955    }
1956
1957    #[test]
1958    fn test_generic_strips_various_markers() {
1959        let comment = "/* Block comment content */";
1960        let doc = ext().extract(comment, Language::FSharp);
1961
1962        assert!(doc.description.unwrap().contains("Block comment content"));
1963    }
1964
1965    #[test]
1966    fn test_generic_hash_comment() {
1967        let comment = "# Hash comment content";
1968        let doc = ext().extract(comment, Language::FSharp);
1969
1970        assert!(doc.description.unwrap().contains("Hash comment content"));
1971    }
1972
1973    #[test]
1974    fn test_generic_double_dash() {
1975        let comment = "-- SQL style comment";
1976        let doc = ext().extract(comment, Language::FSharp);
1977
1978        assert!(doc.description.unwrap().contains("SQL style comment"));
1979    }
1980
1981    #[test]
1982    fn test_generic_semicolon_comment() {
1983        let comment = ";; Lisp-style comment";
1984        let doc = ext().extract(comment, Language::FSharp);
1985
1986        assert!(doc.description.unwrap().contains("Lisp-style comment"));
1987    }
1988
1989    // ---------------------------------------------------------------
1990    // Default trait impl
1991    // ---------------------------------------------------------------
1992
1993    #[test]
1994    fn test_default_creates_extractor() {
1995        let ext: DocumentationExtractor = Default::default();
1996        let doc = ext.extract("/// Hello.", Language::Rust);
1997        assert!(doc.summary.is_some());
1998    }
1999
2000    // ---------------------------------------------------------------
2001    // Raw field preservation
2002    // ---------------------------------------------------------------
2003
2004    #[test]
2005    fn test_raw_field_preserved() {
2006        let input = "/// Some doc.";
2007        let doc = ext().extract(input, Language::Rust);
2008        assert_eq!(doc.raw.as_deref(), Some("/// Some doc."));
2009    }
2010
2011    #[test]
2012    fn test_raw_field_preserved_python() {
2013        let input = "\"\"\"Some doc.\"\"\"";
2014        let doc = ext().extract(input, Language::Python);
2015        assert_eq!(doc.raw.as_deref(), Some("\"\"\"Some doc.\"\"\""));
2016    }
2017
2018    // ---------------------------------------------------------------
2019    // Special characters and code blocks in docs
2020    // ---------------------------------------------------------------
2021
2022    #[test]
2023    fn test_jsdoc_special_characters() {
2024        let jsdoc =
2025            "/**\n * Process <T> & handle \"quotes\".\n * @param {Array<string>} items - The items\n */";
2026        let doc = ext().extract(jsdoc, Language::JavaScript);
2027
2028        assert!(doc.description.unwrap().contains("<T>"));
2029        assert_eq!(doc.params.len(), 1);
2030        assert_eq!(doc.params[0].type_info.as_deref(), Some("Array<string>"));
2031    }
2032
2033    #[test]
2034    fn test_python_docstring_with_code_block() {
2035        let docstring =
2036            "\"\"\"Process data.\n\nExample:\n    ```python\n    result = process(data)\n    ```\n\"\"\"";
2037        let doc = ext().extract(docstring, Language::Python);
2038
2039        assert_eq!(doc.examples.len(), 1);
2040        assert!(doc.examples[0].code.contains("process(data)"));
2041    }
2042
2043    #[test]
2044    fn test_jsdoc_with_unicode() {
2045        let jsdoc = "/**\n * Calculate \u{03C0} (pi) approximation.\n * @param {number} n - Number of iterations\n */";
2046        let doc = ext().extract(jsdoc, Language::JavaScript);
2047
2048        assert!(doc.description.unwrap().contains('\u{03C0}'));
2049        assert_eq!(doc.params.len(), 1);
2050    }
2051
2052    // ---------------------------------------------------------------
2053    // strip_comment_markers helper
2054    // ---------------------------------------------------------------
2055
2056    #[test]
2057    fn test_strip_comment_markers_basic() {
2058        let e = ext();
2059        let result = e.strip_comment_markers("/** line1\n * line2\n */", "/**", "*/", "*");
2060        assert!(result.contains("line1"));
2061        assert!(result.contains("line2"));
2062        // Should not contain the * prefix
2063        assert!(!result.contains("* line2"));
2064    }
2065
2066    #[test]
2067    fn test_strip_comment_markers_no_prefix_match() {
2068        let e = ext();
2069        let result =
2070            e.strip_comment_markers("/** no prefix lines\nplain line\n */", "/**", "*/", "*");
2071        assert!(result.contains("plain line"));
2072    }
2073
2074    // ---------------------------------------------------------------
2075    // strip_rust_doc_markers helper
2076    // ---------------------------------------------------------------
2077
2078    #[test]
2079    fn test_strip_rust_doc_markers_triple_slash() {
2080        let e = ext();
2081        let result = e.strip_rust_doc_markers("/// Hello\n/// World");
2082        assert!(result.contains("Hello"));
2083        assert!(result.contains("World"));
2084    }
2085
2086    #[test]
2087    fn test_strip_rust_doc_markers_inner() {
2088        let e = ext();
2089        let result = e.strip_rust_doc_markers("//! Module doc\n//! More");
2090        assert!(result.contains("Module doc"));
2091        assert!(result.contains("More"));
2092    }
2093
2094    #[test]
2095    fn test_strip_rust_doc_markers_block_style() {
2096        let e = ext();
2097        let result = e.strip_rust_doc_markers("/** Block\n * content\n */");
2098        assert!(result.contains("Block"));
2099        assert!(result.contains("content"));
2100    }
2101
2102    #[test]
2103    fn test_strip_rust_doc_markers_closing_only() {
2104        let e = ext();
2105        let result = e.strip_rust_doc_markers("*/");
2106        // `*/` starts with '*', so it matches the `starts_with('*')` branch
2107        // and returns `"/".trim_start()` = "/"
2108        // The `trimmed == "*/"` branch is unreachable due to ordering
2109        assert_eq!(result, "/");
2110    }
2111
2112    #[test]
2113    fn test_strip_rust_doc_markers_plain_line() {
2114        let e = ext();
2115        let result = e.strip_rust_doc_markers("plain text without markers");
2116        assert!(result.contains("plain text without markers"));
2117    }
2118
2119    // ---------------------------------------------------------------
2120    // Deprecated detection (existing test preserved)
2121    // ---------------------------------------------------------------
2122
2123    #[test]
2124    fn test_deprecated_detection() {
2125        let jsdoc = r#"/**
2126         * Old function.
2127         * @deprecated Use newFunction instead
2128         */
2129        "#;
2130
2131        let doc = ext().extract(jsdoc, Language::JavaScript);
2132
2133        assert!(doc.is_deprecated);
2134        assert!(doc.deprecation_message.is_some());
2135    }
2136}