infiniloom_engine/analysis/
documentation.rs

1//! Documentation extraction and parsing for all supported languages
2//!
3//! Parses JSDoc, Python docstrings, Rust doc comments, JavaDoc, etc.
4//! into structured documentation format.
5
6use crate::analysis::types::{Documentation, Example, ParamDoc, ReturnDoc, ThrowsDoc};
7use crate::parser::Language;
8use regex::Regex;
9
10/// Extracts and parses documentation from source code
11pub struct DocumentationExtractor {
12    // Precompiled regex patterns
13    jsdoc_param: Regex,
14    jsdoc_returns: Regex,
15    jsdoc_throws: Regex,
16    jsdoc_example: Regex,
17    jsdoc_tag: Regex,
18    python_param: Regex,
19    python_returns: Regex,
20    python_raises: Regex,
21    rust_param: Regex,
22}
23
24impl DocumentationExtractor {
25    /// Create a new documentation extractor
26    pub fn new() -> Self {
27        Self {
28            // JSDoc patterns
29            jsdoc_param: Regex::new(r"@param\s+(?:\{([^}]+)\}\s+)?(\[)?(\w+)\]?\s*(?:-\s*)?(.*)")
30                .unwrap(),
31            jsdoc_returns: Regex::new(r"@returns?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
32            jsdoc_throws: Regex::new(r"@throws?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
33            // Note: Example parsing is done manually in parse_jsdoc via in_example state
34            jsdoc_example: Regex::new(r"@example\s*").unwrap(),
35            jsdoc_tag: Regex::new(r"@(\w+)\s+(.*)").unwrap(),
36
37            // Python docstring patterns (Google/NumPy style)
38            python_param: Regex::new(r"^\s*(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$").unwrap(),
39            python_returns: Regex::new(r"^\s*(?:(\w+)\s*:\s*)?(.*)$").unwrap(),
40            python_raises: Regex::new(r"^\s*(\w+)\s*:\s*(.*)$").unwrap(),
41
42            // Rust doc patterns
43            rust_param: Regex::new(r"^\s*\*\s+`(\w+)`\s*(?:-\s*)?(.*)$").unwrap(),
44        }
45    }
46
47    /// Extract documentation from a docstring/comment based on language
48    pub fn extract(&self, raw_doc: &str, language: Language) -> Documentation {
49        let raw_doc = raw_doc.trim();
50        if raw_doc.is_empty() {
51            return Documentation::default();
52        }
53
54        match language {
55            Language::JavaScript | Language::TypeScript => self.parse_jsdoc(raw_doc),
56            Language::Python => self.parse_python_docstring(raw_doc),
57            Language::Rust => self.parse_rust_doc(raw_doc),
58            Language::Java | Language::Kotlin => self.parse_javadoc(raw_doc),
59            Language::Go => self.parse_go_doc(raw_doc),
60            Language::Ruby => self.parse_ruby_doc(raw_doc),
61            Language::Php => self.parse_phpdoc(raw_doc),
62            Language::CSharp => self.parse_csharp_doc(raw_doc),
63            Language::Swift => self.parse_swift_doc(raw_doc),
64            Language::Scala => self.parse_scaladoc(raw_doc),
65            Language::Haskell => self.parse_haddock(raw_doc),
66            Language::Elixir => self.parse_exdoc(raw_doc),
67            Language::Clojure => self.parse_clojure_doc(raw_doc),
68            Language::OCaml => self.parse_ocamldoc(raw_doc),
69            Language::Lua => self.parse_luadoc(raw_doc),
70            Language::R => self.parse_roxygen(raw_doc),
71            Language::Cpp | Language::C => self.parse_doxygen(raw_doc),
72            Language::Bash => self.parse_bash_comment(raw_doc),
73            // Handle any language not explicitly matched (e.g., FSharp)
74            _ => self.parse_generic(raw_doc),
75        }
76    }
77
78    /// Parse JSDoc style documentation
79    fn parse_jsdoc(&self, raw: &str) -> Documentation {
80        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
81
82        // Remove comment markers
83        let content = self.strip_comment_markers(raw, "/**", "*/", "*");
84
85        // Split into lines
86        let lines: Vec<&str> = content.lines().collect();
87
88        // First non-tag lines are the description
89        let mut description_lines = Vec::new();
90        let mut in_description = true;
91        let mut current_example = String::new();
92        let mut in_example = false;
93
94        for line in &lines {
95            let line = line.trim();
96
97            if line.starts_with('@') {
98                in_description = false;
99
100                // End any current example
101                if in_example && !line.starts_with("@example") {
102                    if !current_example.is_empty() {
103                        doc.examples.push(Example {
104                            code: current_example.trim().to_owned(),
105                            ..Default::default()
106                        });
107                    }
108                    current_example.clear();
109                    in_example = false;
110                }
111
112                // Parse different tags
113                if let Some(caps) = self.jsdoc_param.captures(line) {
114                    let type_info = caps.get(1).map(|m| m.as_str().to_owned());
115                    let is_optional = caps.get(2).is_some();
116                    let name = caps.get(3).map_or("", |m| m.as_str());
117                    let desc = caps.get(4).map_or("", |m| m.as_str());
118
119                    doc.params.push(ParamDoc {
120                        name: name.to_owned(),
121                        type_info,
122                        description: if desc.is_empty() {
123                            None
124                        } else {
125                            Some(desc.to_owned())
126                        },
127                        is_optional,
128                        default_value: None,
129                    });
130                } else if let Some(caps) = self.jsdoc_returns.captures(line) {
131                    doc.returns = Some(ReturnDoc {
132                        type_info: caps.get(1).map(|m| m.as_str().to_owned()),
133                        description: caps.get(2).map(|m| m.as_str().to_owned()),
134                    });
135                } else if let Some(caps) = self.jsdoc_throws.captures(line) {
136                    doc.throws.push(ThrowsDoc {
137                        exception_type: caps
138                            .get(1)
139                            .map_or_else(|| "Error".to_owned(), |m| m.as_str().to_owned()),
140                        description: caps.get(2).map(|m| m.as_str().to_owned()),
141                    });
142                } else if line.starts_with("@example") {
143                    in_example = true;
144                    // Content after @example on same line
145                    let after_tag = line.strip_prefix("@example").unwrap_or("").trim();
146                    if !after_tag.is_empty() {
147                        current_example.push_str(after_tag);
148                        current_example.push('\n');
149                    }
150                } else if line.starts_with("@deprecated") {
151                    doc.is_deprecated = true;
152                    let msg = line.strip_prefix("@deprecated").unwrap_or("").trim();
153                    if !msg.is_empty() {
154                        doc.deprecation_message = Some(msg.to_owned());
155                    }
156                } else if let Some(caps) = self.jsdoc_tag.captures(line) {
157                    let tag = caps.get(1).map_or("", |m| m.as_str());
158                    let value = caps.get(2).map_or("", |m| m.as_str());
159                    doc.tags
160                        .entry(tag.to_owned())
161                        .or_default()
162                        .push(value.to_owned());
163                }
164            } else if in_example {
165                current_example.push_str(line);
166                current_example.push('\n');
167            } else if in_description {
168                description_lines.push(line);
169            }
170        }
171
172        // Handle last example
173        if !current_example.is_empty() {
174            doc.examples
175                .push(Example { code: current_example.trim().to_owned(), ..Default::default() });
176        }
177
178        // Set description
179        if !description_lines.is_empty() {
180            let full_desc = description_lines.join("\n");
181            let sentences: Vec<&str> = full_desc.split(". ").collect();
182            if !sentences.is_empty() {
183                doc.summary = Some(sentences[0].to_owned());
184            }
185            doc.description = Some(full_desc);
186        }
187
188        doc
189    }
190
191    /// Parse Python docstring (Google/NumPy/Sphinx style)
192    fn parse_python_docstring(&self, raw: &str) -> Documentation {
193        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
194
195        // Remove triple quotes
196        let content = raw
197            .trim_start_matches("\"\"\"")
198            .trim_end_matches("\"\"\"")
199            .trim_start_matches("'''")
200            .trim_end_matches("'''")
201            .trim();
202
203        let lines: Vec<&str> = content.lines().collect();
204
205        #[derive(PartialEq)]
206        enum Section {
207            Description,
208            Args,
209            Returns,
210            Raises,
211            Example,
212            Other,
213        }
214
215        let mut section = Section::Description;
216        let mut description_lines = Vec::new();
217        let mut current_param: Option<ParamDoc> = None;
218        let mut current_example = String::new();
219
220        for line in lines {
221            let trimmed = line.trim();
222
223            // Check for section headers
224            if trimmed == "Args:" || trimmed == "Arguments:" || trimmed == "Parameters:" {
225                section = Section::Args;
226                continue;
227            } else if trimmed == "Returns:" || trimmed == "Return:" {
228                section = Section::Returns;
229                continue;
230            } else if trimmed == "Raises:" || trimmed == "Throws:" || trimmed == "Exceptions:" {
231                section = Section::Raises;
232                continue;
233            } else if trimmed == "Example:" || trimmed == "Examples:" {
234                section = Section::Example;
235                continue;
236            } else if trimmed.ends_with(':') && !trimmed.contains(' ') {
237                section = Section::Other;
238                continue;
239            }
240
241            match section {
242                Section::Description => {
243                    description_lines.push(trimmed);
244                },
245                Section::Args => {
246                    if let Some(caps) = self.python_param.captures(trimmed) {
247                        // Save previous param
248                        if let Some(param) = current_param.take() {
249                            doc.params.push(param);
250                        }
251
252                        let name = caps.get(1).map_or("", |m| m.as_str());
253                        let type_info = caps.get(2).map(|m| m.as_str().to_owned());
254                        let desc = caps.get(3).map(|m| m.as_str());
255
256                        current_param = Some(ParamDoc {
257                            name: name.to_owned(),
258                            type_info,
259                            description: desc.map(String::from),
260                            is_optional: false,
261                            default_value: None,
262                        });
263                    } else if let Some(ref mut param) = current_param {
264                        // Continuation of previous param description
265                        if let Some(ref mut desc) = param.description {
266                            desc.push(' ');
267                            desc.push_str(trimmed);
268                        }
269                    }
270                },
271                Section::Returns => {
272                    if doc.returns.is_none() {
273                        if let Some(caps) = self.python_returns.captures(trimmed) {
274                            doc.returns = Some(ReturnDoc {
275                                type_info: caps.get(1).map(|m| m.as_str().to_owned()),
276                                description: caps.get(2).map(|m| m.as_str().to_owned()),
277                            });
278                        }
279                    } else if let Some(ref mut ret) = doc.returns {
280                        if let Some(ref mut desc) = ret.description {
281                            desc.push(' ');
282                            desc.push_str(trimmed);
283                        }
284                    }
285                },
286                Section::Raises => {
287                    if let Some(caps) = self.python_raises.captures(trimmed) {
288                        doc.throws.push(ThrowsDoc {
289                            exception_type: caps
290                                .get(1)
291                                .map(|m| m.as_str().to_owned())
292                                .unwrap_or_default(),
293                            description: caps.get(2).map(|m| m.as_str().to_owned()),
294                        });
295                    }
296                },
297                Section::Example => {
298                    current_example.push_str(line);
299                    current_example.push('\n');
300                },
301                Section::Other => {},
302            }
303        }
304
305        // Save last param
306        if let Some(param) = current_param {
307            doc.params.push(param);
308        }
309
310        // Save example
311        if !current_example.is_empty() {
312            doc.examples.push(Example {
313                code: current_example.trim().to_owned(),
314                language: Some("python".to_owned()),
315                ..Default::default()
316            });
317        }
318
319        // Set description
320        let desc = description_lines.join(" ");
321        if !desc.is_empty() {
322            let sentences: Vec<&str> = desc.split(". ").collect();
323            if !sentences.is_empty() {
324                doc.summary = Some(sentences[0].to_owned());
325            }
326            doc.description = Some(desc);
327        }
328
329        doc
330    }
331
332    /// Parse Rust doc comments
333    fn parse_rust_doc(&self, raw: &str) -> Documentation {
334        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
335
336        // Remove /// or //! or /** */
337        let content = self.strip_rust_doc_markers(raw);
338
339        let lines: Vec<&str> = content.lines().collect();
340
341        #[derive(PartialEq)]
342        enum Section {
343            Description,
344            Arguments,
345            Returns,
346            Errors,
347            Panics,
348            Examples,
349            Safety,
350        }
351
352        let mut section = Section::Description;
353        let mut description_lines = Vec::new();
354        let mut current_example = String::new();
355
356        for line in lines {
357            let trimmed = line.trim();
358
359            // Check for section headers (# Headers in Rust docs)
360            if trimmed.starts_with("# ") {
361                let header = trimmed[2..].to_lowercase();
362                section = match header.as_str() {
363                    "arguments" | "parameters" => Section::Arguments,
364                    "returns" => Section::Returns,
365                    "errors" => Section::Errors,
366                    "panics" => Section::Panics,
367                    "examples" | "example" => Section::Examples,
368                    "safety" => Section::Safety,
369                    _ => Section::Description,
370                };
371                continue;
372            }
373
374            match section {
375                Section::Description => {
376                    description_lines.push(trimmed);
377                },
378                Section::Arguments => {
379                    if let Some(caps) = self.rust_param.captures(trimmed) {
380                        doc.params.push(ParamDoc {
381                            name: caps
382                                .get(1)
383                                .map(|m| m.as_str().to_owned())
384                                .unwrap_or_default(),
385                            description: caps.get(2).map(|m| m.as_str().to_owned()),
386                            ..Default::default()
387                        });
388                    }
389                },
390                Section::Returns => {
391                    if doc.returns.is_none() {
392                        doc.returns = Some(ReturnDoc {
393                            description: Some(trimmed.to_owned()),
394                            ..Default::default()
395                        });
396                    }
397                },
398                Section::Errors => {
399                    if !trimmed.is_empty() {
400                        doc.throws.push(ThrowsDoc {
401                            exception_type: "Error".to_owned(),
402                            description: Some(trimmed.to_owned()),
403                        });
404                    }
405                },
406                Section::Panics => {
407                    doc.tags
408                        .entry("panics".to_owned())
409                        .or_default()
410                        .push(trimmed.to_owned());
411                },
412                Section::Examples => {
413                    current_example.push_str(line);
414                    current_example.push('\n');
415                },
416                Section::Safety => {
417                    doc.tags
418                        .entry("safety".to_owned())
419                        .or_default()
420                        .push(trimmed.to_owned());
421                },
422            }
423        }
424
425        // Save example
426        if !current_example.is_empty() {
427            // Extract code blocks (```rust ... ```)
428            let code_block_re = Regex::new(r"```(?:rust)?\n([\s\S]*?)```").unwrap();
429            for caps in code_block_re.captures_iter(&current_example) {
430                if let Some(code) = caps.get(1) {
431                    doc.examples.push(Example {
432                        code: code.as_str().trim().to_owned(),
433                        language: Some("rust".to_owned()),
434                        ..Default::default()
435                    });
436                }
437            }
438        }
439
440        // Set description
441        let desc = description_lines.join(" ");
442        if !desc.is_empty() {
443            let sentences: Vec<&str> = desc.split(". ").collect();
444            if !sentences.is_empty() {
445                doc.summary = Some(sentences[0].to_owned());
446            }
447            doc.description = Some(desc);
448        }
449
450        doc
451    }
452
453    /// Parse JavaDoc style documentation
454    fn parse_javadoc(&self, raw: &str) -> Documentation {
455        // JavaDoc is similar to JSDoc
456        self.parse_jsdoc(raw)
457    }
458
459    /// Parse Go doc comments
460    fn parse_go_doc(&self, raw: &str) -> Documentation {
461        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
462
463        // Go uses simple // comments
464        let content: String = raw
465            .lines()
466            .map(|l| l.trim_start_matches("//").trim())
467            .collect::<Vec<_>>()
468            .join(" ");
469
470        // First sentence is summary
471        let sentences: Vec<&str> = content.split(". ").collect();
472        if !sentences.is_empty() {
473            doc.summary = Some(sentences[0].to_owned());
474        }
475        doc.description = Some(content);
476
477        // Check for Deprecated
478        if raw.to_lowercase().contains("deprecated") {
479            doc.is_deprecated = true;
480        }
481
482        doc
483    }
484
485    /// Parse Ruby RDoc/YARD
486    fn parse_ruby_doc(&self, raw: &str) -> Documentation {
487        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
488
489        let content = self.strip_comment_markers(raw, "=begin", "=end", "#");
490
491        // YARD style @param, @return, @raise
492        let param_re = Regex::new(r"@param\s+\[([^\]]+)\]\s+(\w+)\s+(.*)").unwrap();
493        let return_re = Regex::new(r"@return\s+\[([^\]]+)\]\s+(.*)").unwrap();
494        let raise_re = Regex::new(r"@raise\s+\[([^\]]+)\]\s+(.*)").unwrap();
495
496        for line in content.lines() {
497            let line = line.trim();
498
499            if let Some(caps) = param_re.captures(line) {
500                doc.params.push(ParamDoc {
501                    name: caps
502                        .get(2)
503                        .map(|m| m.as_str().to_owned())
504                        .unwrap_or_default(),
505                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
506                    description: caps.get(3).map(|m| m.as_str().to_owned()),
507                    ..Default::default()
508                });
509            } else if let Some(caps) = return_re.captures(line) {
510                doc.returns = Some(ReturnDoc {
511                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
512                    description: caps.get(2).map(|m| m.as_str().to_owned()),
513                });
514            } else if let Some(caps) = raise_re.captures(line) {
515                doc.throws.push(ThrowsDoc {
516                    exception_type: caps
517                        .get(1)
518                        .map(|m| m.as_str().to_owned())
519                        .unwrap_or_default(),
520                    description: caps.get(2).map(|m| m.as_str().to_owned()),
521                });
522            } else if !line.starts_with('@') && doc.description.is_none() {
523                doc.description = Some(line.to_owned());
524                doc.summary = Some(line.to_owned());
525            }
526        }
527
528        doc
529    }
530
531    /// Parse PHPDoc
532    fn parse_phpdoc(&self, raw: &str) -> Documentation {
533        // PHPDoc is similar to JSDoc
534        self.parse_jsdoc(raw)
535    }
536
537    /// Parse C# XML documentation
538    fn parse_csharp_doc(&self, raw: &str) -> Documentation {
539        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
540
541        // C# uses XML documentation
542        let summary_re = Regex::new(r"<summary>([\s\S]*?)</summary>").unwrap();
543        let param_re = Regex::new(r#"<param name="(\w+)">([\s\S]*?)</param>"#).unwrap();
544        let returns_re = Regex::new(r"<returns>([\s\S]*?)</returns>").unwrap();
545        let exception_re =
546            Regex::new(r#"<exception cref="([^"]+)">([\s\S]*?)</exception>"#).unwrap();
547
548        if let Some(caps) = summary_re.captures(raw) {
549            let summary = caps.get(1).map(|m| m.as_str().trim().to_owned());
550            doc.summary = summary.clone();
551            doc.description = summary;
552        }
553
554        for caps in param_re.captures_iter(raw) {
555            doc.params.push(ParamDoc {
556                name: caps
557                    .get(1)
558                    .map(|m| m.as_str().to_owned())
559                    .unwrap_or_default(),
560                description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
561                ..Default::default()
562            });
563        }
564
565        if let Some(caps) = returns_re.captures(raw) {
566            doc.returns = Some(ReturnDoc {
567                description: caps.get(1).map(|m| m.as_str().trim().to_owned()),
568                ..Default::default()
569            });
570        }
571
572        for caps in exception_re.captures_iter(raw) {
573            doc.throws.push(ThrowsDoc {
574                exception_type: caps
575                    .get(1)
576                    .map(|m| m.as_str().to_owned())
577                    .unwrap_or_default(),
578                description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
579            });
580        }
581
582        doc
583    }
584
585    /// Parse Swift documentation comments
586    fn parse_swift_doc(&self, raw: &str) -> Documentation {
587        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
588
589        // Swift uses /// or /** */ with - Parameter:, - Returns:, - Throws:
590        let content = self.strip_comment_markers(raw, "/**", "*/", "///");
591
592        let param_re = Regex::new(r"-\s*Parameter\s+(\w+):\s*(.*)").unwrap();
593        let returns_re = Regex::new(r"-\s*Returns:\s*(.*)").unwrap();
594        let throws_re = Regex::new(r"-\s*Throws:\s*(.*)").unwrap();
595
596        let mut description_lines = Vec::new();
597
598        for line in content.lines() {
599            let line = line.trim();
600
601            if let Some(caps) = param_re.captures(line) {
602                doc.params.push(ParamDoc {
603                    name: caps
604                        .get(1)
605                        .map(|m| m.as_str().to_owned())
606                        .unwrap_or_default(),
607                    description: caps.get(2).map(|m| m.as_str().to_owned()),
608                    ..Default::default()
609                });
610            } else if let Some(caps) = returns_re.captures(line) {
611                doc.returns = Some(ReturnDoc {
612                    description: caps.get(1).map(|m| m.as_str().to_owned()),
613                    ..Default::default()
614                });
615            } else if let Some(caps) = throws_re.captures(line) {
616                doc.throws.push(ThrowsDoc {
617                    exception_type: "Error".to_owned(),
618                    description: caps.get(1).map(|m| m.as_str().to_owned()),
619                });
620            } else if !line.starts_with('-') && !line.is_empty() {
621                description_lines.push(line);
622            }
623        }
624
625        if !description_lines.is_empty() {
626            let desc = description_lines.join(" ");
627            doc.summary = Some(description_lines[0].to_owned());
628            doc.description = Some(desc);
629        }
630
631        doc
632    }
633
634    /// Parse ScalaDoc
635    fn parse_scaladoc(&self, raw: &str) -> Documentation {
636        // ScalaDoc is similar to JavaDoc
637        self.parse_javadoc(raw)
638    }
639
640    /// Parse Haddock (Haskell)
641    fn parse_haddock(&self, raw: &str) -> Documentation {
642        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
643
644        // Haddock uses -- | or {- | -}
645        let content = raw
646            .lines()
647            .map(|l| {
648                l.trim_start_matches("--")
649                    .trim_start_matches('|')
650                    .trim_start_matches('^')
651                    .trim()
652            })
653            .collect::<Vec<_>>()
654            .join(" ");
655
656        doc.description = Some(content.clone());
657        let sentences: Vec<&str> = content.split(". ").collect();
658        if !sentences.is_empty() {
659            doc.summary = Some(sentences[0].to_owned());
660        }
661
662        doc
663    }
664
665    /// Parse ExDoc (Elixir)
666    fn parse_exdoc(&self, raw: &str) -> Documentation {
667        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
668
669        // ExDoc uses @doc """ ... """ or @moduledoc
670        let content = raw
671            .trim_start_matches("@doc")
672            .trim_start_matches("@moduledoc")
673            .trim()
674            .trim_start_matches("\"\"\"")
675            .trim_end_matches("\"\"\"")
676            .trim();
677
678        // Parse markdown-style documentation
679        let lines: Vec<&str> = content.lines().collect();
680        let mut description_lines = Vec::new();
681
682        for line in lines {
683            let trimmed = line.trim();
684
685            // Check for ## Parameters, ## Returns, etc.
686            if trimmed.starts_with("##") {
687                // Section header
688                continue;
689            }
690
691            if trimmed.starts_with('*') || trimmed.starts_with('-') {
692                // List item - could be a parameter
693                let item = trimmed.trim_start_matches(['*', '-']).trim();
694                if item.contains(':') {
695                    let parts: Vec<&str> = item.splitn(2, ':').collect();
696                    if parts.len() == 2 {
697                        doc.params.push(ParamDoc {
698                            name: parts[0].trim().to_owned(),
699                            description: Some(parts[1].trim().to_owned()),
700                            ..Default::default()
701                        });
702                    }
703                }
704            } else if !trimmed.is_empty() {
705                description_lines.push(trimmed);
706            }
707        }
708
709        if !description_lines.is_empty() {
710            doc.summary = Some(description_lines[0].to_owned());
711            doc.description = Some(description_lines.join(" "));
712        }
713
714        doc
715    }
716
717    /// Parse Clojure docstring
718    fn parse_clojure_doc(&self, raw: &str) -> Documentation {
719        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
720
721        // Clojure docstrings are simple strings
722        let content = raw.trim_matches('"');
723
724        doc.description = Some(content.to_owned());
725        let sentences: Vec<&str> = content.split(". ").collect();
726        if !sentences.is_empty() {
727            doc.summary = Some(sentences[0].to_owned());
728        }
729
730        doc
731    }
732
733    /// Parse OCamldoc
734    fn parse_ocamldoc(&self, raw: &str) -> Documentation {
735        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
736
737        // OCamldoc uses (** ... *)
738        let content = raw.trim_start_matches("(**").trim_end_matches("*)").trim();
739
740        // Parse @param, @return, @raise
741        let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
742        let return_re = Regex::new(r"@return\s+(.*)").unwrap();
743        let raise_re = Regex::new(r"@raise\s+(\w+)\s+(.*)").unwrap();
744
745        let mut description_lines = Vec::new();
746
747        for line in content.lines() {
748            let line = line.trim();
749
750            if let Some(caps) = param_re.captures(line) {
751                doc.params.push(ParamDoc {
752                    name: caps
753                        .get(1)
754                        .map(|m| m.as_str().to_owned())
755                        .unwrap_or_default(),
756                    description: caps.get(2).map(|m| m.as_str().to_owned()),
757                    ..Default::default()
758                });
759            } else if let Some(caps) = return_re.captures(line) {
760                doc.returns = Some(ReturnDoc {
761                    description: caps.get(1).map(|m| m.as_str().to_owned()),
762                    ..Default::default()
763                });
764            } else if let Some(caps) = raise_re.captures(line) {
765                doc.throws.push(ThrowsDoc {
766                    exception_type: caps
767                        .get(1)
768                        .map(|m| m.as_str().to_owned())
769                        .unwrap_or_default(),
770                    description: caps.get(2).map(|m| m.as_str().to_owned()),
771                });
772            } else if !line.starts_with('@') {
773                description_lines.push(line);
774            }
775        }
776
777        if !description_lines.is_empty() {
778            doc.summary = Some(description_lines[0].to_owned());
779            doc.description = Some(description_lines.join(" "));
780        }
781
782        doc
783    }
784
785    /// Parse LuaDoc
786    fn parse_luadoc(&self, raw: &str) -> Documentation {
787        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
788
789        // LuaDoc uses --- or --[[ ]]
790        let content: String = raw
791            .lines()
792            .map(|l| l.trim_start_matches("---").trim_start_matches("--").trim())
793            .collect::<Vec<_>>()
794            .join("\n");
795
796        // Parse @param, @return
797        let param_re = Regex::new(r"@param\s+(\w+)\s+(\w+)\s*(.*)").unwrap();
798        let return_re = Regex::new(r"@return\s+(\w+)\s*(.*)").unwrap();
799
800        let mut description_lines = Vec::new();
801
802        for line in content.lines() {
803            let line = line.trim();
804
805            if let Some(caps) = param_re.captures(line) {
806                doc.params.push(ParamDoc {
807                    name: caps
808                        .get(1)
809                        .map(|m| m.as_str().to_owned())
810                        .unwrap_or_default(),
811                    type_info: caps.get(2).map(|m| m.as_str().to_owned()),
812                    description: caps.get(3).map(|m| m.as_str().to_owned()),
813                    ..Default::default()
814                });
815            } else if let Some(caps) = return_re.captures(line) {
816                doc.returns = Some(ReturnDoc {
817                    type_info: caps.get(1).map(|m| m.as_str().to_owned()),
818                    description: caps.get(2).map(|m| m.as_str().to_owned()),
819                });
820            } else if !line.starts_with('@') {
821                description_lines.push(line);
822            }
823        }
824
825        if !description_lines.is_empty() {
826            doc.summary = Some(description_lines[0].to_owned());
827            doc.description = Some(description_lines.join(" "));
828        }
829
830        doc
831    }
832
833    /// Parse Roxygen2 (R)
834    fn parse_roxygen(&self, raw: &str) -> Documentation {
835        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
836
837        // Roxygen uses #' @param, #' @return, etc.
838        let content: String = raw
839            .lines()
840            .map(|l| l.trim_start_matches("#'").trim())
841            .collect::<Vec<_>>()
842            .join("\n");
843
844        let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
845        let return_re = Regex::new(r"@return\s+(.*)").unwrap();
846
847        let mut description_lines = Vec::new();
848
849        for line in content.lines() {
850            let line = line.trim();
851
852            if let Some(caps) = param_re.captures(line) {
853                doc.params.push(ParamDoc {
854                    name: caps
855                        .get(1)
856                        .map(|m| m.as_str().to_owned())
857                        .unwrap_or_default(),
858                    description: caps.get(2).map(|m| m.as_str().to_owned()),
859                    ..Default::default()
860                });
861            } else if let Some(caps) = return_re.captures(line) {
862                doc.returns = Some(ReturnDoc {
863                    description: caps.get(1).map(|m| m.as_str().to_owned()),
864                    ..Default::default()
865                });
866            } else if !line.starts_with('@') {
867                description_lines.push(line);
868            }
869        }
870
871        if !description_lines.is_empty() {
872            doc.summary = Some(description_lines[0].to_owned());
873            doc.description = Some(description_lines.join(" "));
874        }
875
876        doc
877    }
878
879    /// Parse Doxygen (C/C++)
880    fn parse_doxygen(&self, raw: &str) -> Documentation {
881        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
882
883        // Doxygen uses /** */, //!, \param, \return, etc.
884        let content = self.strip_comment_markers(raw, "/**", "*/", "*");
885
886        let param_re = Regex::new(r"[@\\]param(?:\[(?:in|out|in,out)\])?\s+(\w+)\s+(.*)").unwrap();
887        let return_re = Regex::new(r"[@\\]returns?\s+(.*)").unwrap();
888        let throws_re = Regex::new(r"[@\\](?:throws?|exception)\s+(\w+)\s*(.*)").unwrap();
889        let brief_re = Regex::new(r"[@\\]brief\s+(.*)").unwrap();
890
891        let mut description_lines = Vec::new();
892
893        for line in content.lines() {
894            let line = line.trim();
895
896            if let Some(caps) = brief_re.captures(line) {
897                doc.summary = caps.get(1).map(|m| m.as_str().to_owned());
898            } else if let Some(caps) = param_re.captures(line) {
899                doc.params.push(ParamDoc {
900                    name: caps
901                        .get(1)
902                        .map(|m| m.as_str().to_owned())
903                        .unwrap_or_default(),
904                    description: caps.get(2).map(|m| m.as_str().to_owned()),
905                    ..Default::default()
906                });
907            } else if let Some(caps) = return_re.captures(line) {
908                doc.returns = Some(ReturnDoc {
909                    description: caps.get(1).map(|m| m.as_str().to_owned()),
910                    ..Default::default()
911                });
912            } else if let Some(caps) = throws_re.captures(line) {
913                doc.throws.push(ThrowsDoc {
914                    exception_type: caps
915                        .get(1)
916                        .map(|m| m.as_str().to_owned())
917                        .unwrap_or_default(),
918                    description: caps.get(2).map(|m| m.as_str().to_owned()),
919                });
920            } else if !line.starts_with('@') && !line.starts_with('\\') {
921                description_lines.push(line);
922            }
923        }
924
925        if doc.summary.is_none() && !description_lines.is_empty() {
926            doc.summary = Some(description_lines[0].to_owned());
927        }
928        if !description_lines.is_empty() {
929            doc.description = Some(description_lines.join(" "));
930        }
931
932        doc
933    }
934
935    /// Parse bash script comments
936    fn parse_bash_comment(&self, raw: &str) -> Documentation {
937        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
938
939        let content: String = raw
940            .lines()
941            .map(|l| l.trim_start_matches('#').trim())
942            .filter(|l| !l.is_empty())
943            .collect::<Vec<_>>()
944            .join(" ");
945
946        doc.description = Some(content.clone());
947        let sentences: Vec<&str> = content.split(". ").collect();
948        if !sentences.is_empty() {
949            doc.summary = Some(sentences[0].to_owned());
950        }
951
952        doc
953    }
954
955    /// Parse generic comment (fallback)
956    fn parse_generic(&self, raw: &str) -> Documentation {
957        let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
958
959        // Strip common comment markers
960        let content: String = raw
961            .lines()
962            .map(|l| {
963                l.trim()
964                    .trim_start_matches("//")
965                    .trim_start_matches("/*")
966                    .trim_end_matches("*/")
967                    .trim_start_matches('#')
968                    .trim_start_matches("--")
969                    .trim_start_matches(";;")
970                    .trim()
971            })
972            .filter(|l| !l.is_empty())
973            .collect::<Vec<_>>()
974            .join(" ");
975
976        doc.description = Some(content.clone());
977        let sentences: Vec<&str> = content.split(". ").collect();
978        if !sentences.is_empty() {
979            doc.summary = Some(sentences[0].to_owned());
980        }
981
982        doc
983    }
984
985    // Helper methods
986
987    fn strip_comment_markers(&self, raw: &str, start: &str, end: &str, line: &str) -> String {
988        let mut content = raw
989            .trim()
990            .trim_start_matches(start)
991            .trim_end_matches(end)
992            .to_owned();
993
994        // Remove line prefixes
995        content = content
996            .lines()
997            .map(|l| {
998                let trimmed = l.trim();
999                if trimmed.starts_with(line) {
1000                    trimmed[line.len()..].trim_start()
1001                } else {
1002                    trimmed
1003                }
1004            })
1005            .collect::<Vec<_>>()
1006            .join("\n");
1007
1008        content
1009    }
1010
1011    fn strip_rust_doc_markers(&self, raw: &str) -> String {
1012        raw.lines()
1013            .map(|l| {
1014                let trimmed = l.trim();
1015                if trimmed.starts_with("///") {
1016                    trimmed[3..].trim_start()
1017                } else if trimmed.starts_with("//!") {
1018                    trimmed[3..].trim_start()
1019                } else if trimmed.starts_with("/**") {
1020                    trimmed[3..].trim_start()
1021                } else if trimmed.starts_with('*') {
1022                    trimmed[1..].trim_start()
1023                } else if trimmed == "*/" {
1024                    ""
1025                } else {
1026                    trimmed
1027                }
1028            })
1029            .collect::<Vec<_>>()
1030            .join("\n")
1031    }
1032}
1033
1034impl Default for DocumentationExtractor {
1035    fn default() -> Self {
1036        Self::new()
1037    }
1038}
1039
1040#[cfg(test)]
1041mod tests {
1042    use super::*;
1043
1044    #[test]
1045    fn test_jsdoc_parsing() {
1046        let extractor = DocumentationExtractor::new();
1047
1048        let jsdoc = r#"/**
1049         * Calculate the sum of two numbers.
1050         *
1051         * @param {number} a - The first number
1052         * @param {number} b - The second number
1053         * @returns {number} The sum of a and b
1054         * @throws {Error} If inputs are not numbers
1055         * @example
1056         * add(1, 2) // returns 3
1057         */
1058        "#;
1059
1060        let doc = extractor.extract(jsdoc, Language::JavaScript);
1061
1062        assert!(doc.summary.is_some());
1063        assert!(doc.summary.unwrap().contains("Calculate"));
1064        assert_eq!(doc.params.len(), 2);
1065        assert_eq!(doc.params[0].name, "a");
1066        assert!(doc.params[0].type_info.as_ref().unwrap().contains("number"));
1067        assert!(doc.returns.is_some());
1068        assert_eq!(doc.throws.len(), 1);
1069        assert_eq!(doc.examples.len(), 1);
1070    }
1071
1072    #[test]
1073    fn test_python_docstring_parsing() {
1074        let extractor = DocumentationExtractor::new();
1075
1076        let docstring = r#""""
1077        Calculate the sum of two numbers.
1078
1079        Args:
1080            a (int): The first number
1081            b (int): The second number
1082
1083        Returns:
1084            int: The sum of a and b
1085
1086        Raises:
1087            ValueError: If inputs are not integers
1088        """"#;
1089
1090        let doc = extractor.extract(docstring, Language::Python);
1091
1092        assert!(doc.summary.is_some());
1093        assert!(doc.summary.unwrap().contains("Calculate"));
1094        assert_eq!(doc.params.len(), 2);
1095        assert_eq!(doc.params[0].name, "a");
1096        assert!(doc.returns.is_some());
1097        assert_eq!(doc.throws.len(), 1);
1098    }
1099
1100    #[test]
1101    fn test_rust_doc_parsing() {
1102        let extractor = DocumentationExtractor::new();
1103
1104        let rust_doc = r#"/// Calculate the sum of two numbers.
1105        ///
1106        /// # Arguments
1107        ///
1108        /// * `a` - The first number
1109        /// * `b` - The second number
1110        ///
1111        /// # Returns
1112        ///
1113        /// The sum of a and b
1114        "#;
1115
1116        let doc = extractor.extract(rust_doc, Language::Rust);
1117
1118        assert!(doc.summary.is_some());
1119        assert!(doc.summary.unwrap().contains("Calculate"));
1120        assert!(doc.returns.is_some());
1121    }
1122
1123    #[test]
1124    fn test_deprecated_detection() {
1125        let extractor = DocumentationExtractor::new();
1126
1127        let jsdoc = r#"/**
1128         * Old function.
1129         * @deprecated Use newFunction instead
1130         */
1131        "#;
1132
1133        let doc = extractor.extract(jsdoc, Language::JavaScript);
1134
1135        assert!(doc.is_deprecated);
1136        assert!(doc.deprecation_message.is_some());
1137    }
1138}