1use crate::analysis::types::{Documentation, Example, ParamDoc, ReturnDoc, ThrowsDoc};
7use crate::parser::Language;
8use regex::Regex;
9
10pub struct DocumentationExtractor {
12 jsdoc_param: Regex,
14 jsdoc_returns: Regex,
15 jsdoc_throws: Regex,
16 jsdoc_example: Regex,
17 jsdoc_tag: Regex,
18 python_param: Regex,
19 python_returns: Regex,
20 python_raises: Regex,
21 rust_param: Regex,
22}
23
24impl DocumentationExtractor {
25 pub fn new() -> Self {
27 Self {
28 jsdoc_param: Regex::new(r"@param\s+(?:\{([^}]+)\}\s+)?(\[)?(\w+)\]?\s*(?:-\s*)?(.*)")
30 .unwrap(),
31 jsdoc_returns: Regex::new(r"@returns?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
32 jsdoc_throws: Regex::new(r"@throws?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
33 jsdoc_example: Regex::new(r"@example\s*").unwrap(),
35 jsdoc_tag: Regex::new(r"@(\w+)\s+(.*)").unwrap(),
36
37 python_param: Regex::new(r"^\s*(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$").unwrap(),
39 python_returns: Regex::new(r"^\s*(?:(\w+)\s*:\s*)?(.*)$").unwrap(),
40 python_raises: Regex::new(r"^\s*(\w+)\s*:\s*(.*)$").unwrap(),
41
42 rust_param: Regex::new(r"^\s*\*\s+`(\w+)`\s*(?:-\s*)?(.*)$").unwrap(),
44 }
45 }
46
47 pub fn extract(&self, raw_doc: &str, language: Language) -> Documentation {
49 let raw_doc = raw_doc.trim();
50 if raw_doc.is_empty() {
51 return Documentation::default();
52 }
53
54 match language {
55 Language::JavaScript | Language::TypeScript => self.parse_jsdoc(raw_doc),
56 Language::Python => self.parse_python_docstring(raw_doc),
57 Language::Rust => self.parse_rust_doc(raw_doc),
58 Language::Java | Language::Kotlin => self.parse_javadoc(raw_doc),
59 Language::Go => self.parse_go_doc(raw_doc),
60 Language::Ruby => self.parse_ruby_doc(raw_doc),
61 Language::Php => self.parse_phpdoc(raw_doc),
62 Language::CSharp => self.parse_csharp_doc(raw_doc),
63 Language::Swift => self.parse_swift_doc(raw_doc),
64 Language::Scala => self.parse_scaladoc(raw_doc),
65 Language::Haskell => self.parse_haddock(raw_doc),
66 Language::Elixir => self.parse_exdoc(raw_doc),
67 Language::Clojure => self.parse_clojure_doc(raw_doc),
68 Language::OCaml => self.parse_ocamldoc(raw_doc),
69 Language::Lua => self.parse_luadoc(raw_doc),
70 Language::R => self.parse_roxygen(raw_doc),
71 Language::Cpp | Language::C => self.parse_doxygen(raw_doc),
72 Language::Bash => self.parse_bash_comment(raw_doc),
73 _ => self.parse_generic(raw_doc),
75 }
76 }
77
78 fn parse_jsdoc(&self, raw: &str) -> Documentation {
80 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
81
82 let content = self.strip_comment_markers(raw, "/**", "*/", "*");
84
85 let lines: Vec<&str> = content.lines().collect();
87
88 let mut description_lines = Vec::new();
90 let mut in_description = true;
91 let mut current_example = String::new();
92 let mut in_example = false;
93
94 for line in &lines {
95 let line = line.trim();
96
97 if line.starts_with('@') {
98 in_description = false;
99
100 if in_example && !line.starts_with("@example") {
102 if !current_example.is_empty() {
103 doc.examples.push(Example {
104 code: current_example.trim().to_owned(),
105 ..Default::default()
106 });
107 }
108 current_example.clear();
109 in_example = false;
110 }
111
112 if let Some(caps) = self.jsdoc_param.captures(line) {
114 let type_info = caps.get(1).map(|m| m.as_str().to_owned());
115 let is_optional = caps.get(2).is_some();
116 let name = caps.get(3).map_or("", |m| m.as_str());
117 let desc = caps.get(4).map_or("", |m| m.as_str());
118
119 doc.params.push(ParamDoc {
120 name: name.to_owned(),
121 type_info,
122 description: if desc.is_empty() {
123 None
124 } else {
125 Some(desc.to_owned())
126 },
127 is_optional,
128 default_value: None,
129 });
130 } else if let Some(caps) = self.jsdoc_returns.captures(line) {
131 doc.returns = Some(ReturnDoc {
132 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
133 description: caps.get(2).map(|m| m.as_str().to_owned()),
134 });
135 } else if let Some(caps) = self.jsdoc_throws.captures(line) {
136 doc.throws.push(ThrowsDoc {
137 exception_type: caps
138 .get(1)
139 .map_or_else(|| "Error".to_owned(), |m| m.as_str().to_owned()),
140 description: caps.get(2).map(|m| m.as_str().to_owned()),
141 });
142 } else if line.starts_with("@example") {
143 in_example = true;
144 let after_tag = line.strip_prefix("@example").unwrap_or("").trim();
146 if !after_tag.is_empty() {
147 current_example.push_str(after_tag);
148 current_example.push('\n');
149 }
150 } else if line.starts_with("@deprecated") {
151 doc.is_deprecated = true;
152 let msg = line.strip_prefix("@deprecated").unwrap_or("").trim();
153 if !msg.is_empty() {
154 doc.deprecation_message = Some(msg.to_owned());
155 }
156 } else if let Some(caps) = self.jsdoc_tag.captures(line) {
157 let tag = caps.get(1).map_or("", |m| m.as_str());
158 let value = caps.get(2).map_or("", |m| m.as_str());
159 doc.tags
160 .entry(tag.to_owned())
161 .or_default()
162 .push(value.to_owned());
163 }
164 } else if in_example {
165 current_example.push_str(line);
166 current_example.push('\n');
167 } else if in_description {
168 description_lines.push(line);
169 }
170 }
171
172 if !current_example.is_empty() {
174 doc.examples
175 .push(Example { code: current_example.trim().to_owned(), ..Default::default() });
176 }
177
178 if !description_lines.is_empty() {
180 let full_desc = description_lines.join("\n");
181 let sentences: Vec<&str> = full_desc.split(". ").collect();
182 if !sentences.is_empty() {
183 doc.summary = Some(sentences[0].to_owned());
184 }
185 doc.description = Some(full_desc);
186 }
187
188 doc
189 }
190
191 fn parse_python_docstring(&self, raw: &str) -> Documentation {
193 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
194
195 let content = raw
197 .trim_start_matches("\"\"\"")
198 .trim_end_matches("\"\"\"")
199 .trim_start_matches("'''")
200 .trim_end_matches("'''")
201 .trim();
202
203 let lines: Vec<&str> = content.lines().collect();
204
205 #[derive(PartialEq)]
206 enum Section {
207 Description,
208 Args,
209 Returns,
210 Raises,
211 Example,
212 Other,
213 }
214
215 let mut section = Section::Description;
216 let mut description_lines = Vec::new();
217 let mut current_param: Option<ParamDoc> = None;
218 let mut current_example = String::new();
219
220 for line in lines {
221 let trimmed = line.trim();
222
223 if trimmed == "Args:" || trimmed == "Arguments:" || trimmed == "Parameters:" {
225 section = Section::Args;
226 continue;
227 } else if trimmed == "Returns:" || trimmed == "Return:" {
228 section = Section::Returns;
229 continue;
230 } else if trimmed == "Raises:" || trimmed == "Throws:" || trimmed == "Exceptions:" {
231 section = Section::Raises;
232 continue;
233 } else if trimmed == "Example:" || trimmed == "Examples:" {
234 section = Section::Example;
235 continue;
236 } else if trimmed.ends_with(':') && !trimmed.contains(' ') {
237 section = Section::Other;
238 continue;
239 }
240
241 match section {
242 Section::Description => {
243 description_lines.push(trimmed);
244 },
245 Section::Args => {
246 if let Some(caps) = self.python_param.captures(trimmed) {
247 if let Some(param) = current_param.take() {
249 doc.params.push(param);
250 }
251
252 let name = caps.get(1).map_or("", |m| m.as_str());
253 let type_info = caps.get(2).map(|m| m.as_str().to_owned());
254 let desc = caps.get(3).map(|m| m.as_str());
255
256 current_param = Some(ParamDoc {
257 name: name.to_owned(),
258 type_info,
259 description: desc.map(String::from),
260 is_optional: false,
261 default_value: None,
262 });
263 } else if let Some(ref mut param) = current_param {
264 if let Some(ref mut desc) = param.description {
266 desc.push(' ');
267 desc.push_str(trimmed);
268 }
269 }
270 },
271 Section::Returns => {
272 if doc.returns.is_none() {
273 if let Some(caps) = self.python_returns.captures(trimmed) {
274 doc.returns = Some(ReturnDoc {
275 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
276 description: caps.get(2).map(|m| m.as_str().to_owned()),
277 });
278 }
279 } else if let Some(ref mut ret) = doc.returns {
280 if let Some(ref mut desc) = ret.description {
281 desc.push(' ');
282 desc.push_str(trimmed);
283 }
284 }
285 },
286 Section::Raises => {
287 if let Some(caps) = self.python_raises.captures(trimmed) {
288 doc.throws.push(ThrowsDoc {
289 exception_type: caps
290 .get(1)
291 .map(|m| m.as_str().to_owned())
292 .unwrap_or_default(),
293 description: caps.get(2).map(|m| m.as_str().to_owned()),
294 });
295 }
296 },
297 Section::Example => {
298 current_example.push_str(line);
299 current_example.push('\n');
300 },
301 Section::Other => {},
302 }
303 }
304
305 if let Some(param) = current_param {
307 doc.params.push(param);
308 }
309
310 if !current_example.is_empty() {
312 doc.examples.push(Example {
313 code: current_example.trim().to_owned(),
314 language: Some("python".to_owned()),
315 ..Default::default()
316 });
317 }
318
319 let desc = description_lines.join(" ");
321 if !desc.is_empty() {
322 let sentences: Vec<&str> = desc.split(". ").collect();
323 if !sentences.is_empty() {
324 doc.summary = Some(sentences[0].to_owned());
325 }
326 doc.description = Some(desc);
327 }
328
329 doc
330 }
331
332 fn parse_rust_doc(&self, raw: &str) -> Documentation {
334 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
335
336 let content = self.strip_rust_doc_markers(raw);
338
339 let lines: Vec<&str> = content.lines().collect();
340
341 #[derive(PartialEq)]
342 enum Section {
343 Description,
344 Arguments,
345 Returns,
346 Errors,
347 Panics,
348 Examples,
349 Safety,
350 }
351
352 let mut section = Section::Description;
353 let mut description_lines = Vec::new();
354 let mut current_example = String::new();
355
356 for line in lines {
357 let trimmed = line.trim();
358
359 if trimmed.starts_with("# ") {
361 let header = trimmed[2..].to_lowercase();
362 section = match header.as_str() {
363 "arguments" | "parameters" => Section::Arguments,
364 "returns" => Section::Returns,
365 "errors" => Section::Errors,
366 "panics" => Section::Panics,
367 "examples" | "example" => Section::Examples,
368 "safety" => Section::Safety,
369 _ => Section::Description,
370 };
371 continue;
372 }
373
374 match section {
375 Section::Description => {
376 description_lines.push(trimmed);
377 },
378 Section::Arguments => {
379 if let Some(caps) = self.rust_param.captures(trimmed) {
380 doc.params.push(ParamDoc {
381 name: caps
382 .get(1)
383 .map(|m| m.as_str().to_owned())
384 .unwrap_or_default(),
385 description: caps.get(2).map(|m| m.as_str().to_owned()),
386 ..Default::default()
387 });
388 }
389 },
390 Section::Returns => {
391 if doc.returns.is_none() {
392 doc.returns = Some(ReturnDoc {
393 description: Some(trimmed.to_owned()),
394 ..Default::default()
395 });
396 }
397 },
398 Section::Errors => {
399 if !trimmed.is_empty() {
400 doc.throws.push(ThrowsDoc {
401 exception_type: "Error".to_owned(),
402 description: Some(trimmed.to_owned()),
403 });
404 }
405 },
406 Section::Panics => {
407 doc.tags
408 .entry("panics".to_owned())
409 .or_default()
410 .push(trimmed.to_owned());
411 },
412 Section::Examples => {
413 current_example.push_str(line);
414 current_example.push('\n');
415 },
416 Section::Safety => {
417 doc.tags
418 .entry("safety".to_owned())
419 .or_default()
420 .push(trimmed.to_owned());
421 },
422 }
423 }
424
425 if !current_example.is_empty() {
427 let code_block_re = Regex::new(r"```(?:rust)?\n([\s\S]*?)```").unwrap();
429 for caps in code_block_re.captures_iter(¤t_example) {
430 if let Some(code) = caps.get(1) {
431 doc.examples.push(Example {
432 code: code.as_str().trim().to_owned(),
433 language: Some("rust".to_owned()),
434 ..Default::default()
435 });
436 }
437 }
438 }
439
440 let desc = description_lines.join(" ");
442 if !desc.is_empty() {
443 let sentences: Vec<&str> = desc.split(". ").collect();
444 if !sentences.is_empty() {
445 doc.summary = Some(sentences[0].to_owned());
446 }
447 doc.description = Some(desc);
448 }
449
450 doc
451 }
452
453 fn parse_javadoc(&self, raw: &str) -> Documentation {
455 self.parse_jsdoc(raw)
457 }
458
459 fn parse_go_doc(&self, raw: &str) -> Documentation {
461 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
462
463 let content: String = raw
465 .lines()
466 .map(|l| l.trim_start_matches("//").trim())
467 .collect::<Vec<_>>()
468 .join(" ");
469
470 let sentences: Vec<&str> = content.split(". ").collect();
472 if !sentences.is_empty() {
473 doc.summary = Some(sentences[0].to_owned());
474 }
475 doc.description = Some(content);
476
477 if raw.to_lowercase().contains("deprecated") {
479 doc.is_deprecated = true;
480 }
481
482 doc
483 }
484
485 fn parse_ruby_doc(&self, raw: &str) -> Documentation {
487 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
488
489 let content = self.strip_comment_markers(raw, "=begin", "=end", "#");
490
491 let param_re = Regex::new(r"@param\s+\[([^\]]+)\]\s+(\w+)\s+(.*)").unwrap();
493 let return_re = Regex::new(r"@return\s+\[([^\]]+)\]\s+(.*)").unwrap();
494 let raise_re = Regex::new(r"@raise\s+\[([^\]]+)\]\s+(.*)").unwrap();
495
496 for line in content.lines() {
497 let line = line.trim();
498
499 if let Some(caps) = param_re.captures(line) {
500 doc.params.push(ParamDoc {
501 name: caps
502 .get(2)
503 .map(|m| m.as_str().to_owned())
504 .unwrap_or_default(),
505 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
506 description: caps.get(3).map(|m| m.as_str().to_owned()),
507 ..Default::default()
508 });
509 } else if let Some(caps) = return_re.captures(line) {
510 doc.returns = Some(ReturnDoc {
511 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
512 description: caps.get(2).map(|m| m.as_str().to_owned()),
513 });
514 } else if let Some(caps) = raise_re.captures(line) {
515 doc.throws.push(ThrowsDoc {
516 exception_type: caps
517 .get(1)
518 .map(|m| m.as_str().to_owned())
519 .unwrap_or_default(),
520 description: caps.get(2).map(|m| m.as_str().to_owned()),
521 });
522 } else if !line.starts_with('@') && doc.description.is_none() {
523 doc.description = Some(line.to_owned());
524 doc.summary = Some(line.to_owned());
525 }
526 }
527
528 doc
529 }
530
531 fn parse_phpdoc(&self, raw: &str) -> Documentation {
533 self.parse_jsdoc(raw)
535 }
536
537 fn parse_csharp_doc(&self, raw: &str) -> Documentation {
539 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
540
541 let summary_re = Regex::new(r"<summary>([\s\S]*?)</summary>").unwrap();
543 let param_re = Regex::new(r#"<param name="(\w+)">([\s\S]*?)</param>"#).unwrap();
544 let returns_re = Regex::new(r"<returns>([\s\S]*?)</returns>").unwrap();
545 let exception_re =
546 Regex::new(r#"<exception cref="([^"]+)">([\s\S]*?)</exception>"#).unwrap();
547
548 if let Some(caps) = summary_re.captures(raw) {
549 let summary = caps.get(1).map(|m| m.as_str().trim().to_owned());
550 doc.summary = summary.clone();
551 doc.description = summary;
552 }
553
554 for caps in param_re.captures_iter(raw) {
555 doc.params.push(ParamDoc {
556 name: caps
557 .get(1)
558 .map(|m| m.as_str().to_owned())
559 .unwrap_or_default(),
560 description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
561 ..Default::default()
562 });
563 }
564
565 if let Some(caps) = returns_re.captures(raw) {
566 doc.returns = Some(ReturnDoc {
567 description: caps.get(1).map(|m| m.as_str().trim().to_owned()),
568 ..Default::default()
569 });
570 }
571
572 for caps in exception_re.captures_iter(raw) {
573 doc.throws.push(ThrowsDoc {
574 exception_type: caps
575 .get(1)
576 .map(|m| m.as_str().to_owned())
577 .unwrap_or_default(),
578 description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
579 });
580 }
581
582 doc
583 }
584
585 fn parse_swift_doc(&self, raw: &str) -> Documentation {
587 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
588
589 let content = self.strip_comment_markers(raw, "/**", "*/", "///");
591
592 let param_re = Regex::new(r"-\s*Parameter\s+(\w+):\s*(.*)").unwrap();
593 let returns_re = Regex::new(r"-\s*Returns:\s*(.*)").unwrap();
594 let throws_re = Regex::new(r"-\s*Throws:\s*(.*)").unwrap();
595
596 let mut description_lines = Vec::new();
597
598 for line in content.lines() {
599 let line = line.trim();
600
601 if let Some(caps) = param_re.captures(line) {
602 doc.params.push(ParamDoc {
603 name: caps
604 .get(1)
605 .map(|m| m.as_str().to_owned())
606 .unwrap_or_default(),
607 description: caps.get(2).map(|m| m.as_str().to_owned()),
608 ..Default::default()
609 });
610 } else if let Some(caps) = returns_re.captures(line) {
611 doc.returns = Some(ReturnDoc {
612 description: caps.get(1).map(|m| m.as_str().to_owned()),
613 ..Default::default()
614 });
615 } else if let Some(caps) = throws_re.captures(line) {
616 doc.throws.push(ThrowsDoc {
617 exception_type: "Error".to_owned(),
618 description: caps.get(1).map(|m| m.as_str().to_owned()),
619 });
620 } else if !line.starts_with('-') && !line.is_empty() {
621 description_lines.push(line);
622 }
623 }
624
625 if !description_lines.is_empty() {
626 let desc = description_lines.join(" ");
627 doc.summary = Some(description_lines[0].to_owned());
628 doc.description = Some(desc);
629 }
630
631 doc
632 }
633
634 fn parse_scaladoc(&self, raw: &str) -> Documentation {
636 self.parse_javadoc(raw)
638 }
639
640 fn parse_haddock(&self, raw: &str) -> Documentation {
642 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
643
644 let content = raw
646 .lines()
647 .map(|l| {
648 l.trim_start_matches("--")
649 .trim_start_matches('|')
650 .trim_start_matches('^')
651 .trim()
652 })
653 .collect::<Vec<_>>()
654 .join(" ");
655
656 doc.description = Some(content.clone());
657 let sentences: Vec<&str> = content.split(". ").collect();
658 if !sentences.is_empty() {
659 doc.summary = Some(sentences[0].to_owned());
660 }
661
662 doc
663 }
664
665 fn parse_exdoc(&self, raw: &str) -> Documentation {
667 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
668
669 let content = raw
671 .trim_start_matches("@doc")
672 .trim_start_matches("@moduledoc")
673 .trim()
674 .trim_start_matches("\"\"\"")
675 .trim_end_matches("\"\"\"")
676 .trim();
677
678 let lines: Vec<&str> = content.lines().collect();
680 let mut description_lines = Vec::new();
681
682 for line in lines {
683 let trimmed = line.trim();
684
685 if trimmed.starts_with("##") {
687 continue;
689 }
690
691 if trimmed.starts_with('*') || trimmed.starts_with('-') {
692 let item = trimmed.trim_start_matches(['*', '-']).trim();
694 if item.contains(':') {
695 let parts: Vec<&str> = item.splitn(2, ':').collect();
696 if parts.len() == 2 {
697 doc.params.push(ParamDoc {
698 name: parts[0].trim().to_owned(),
699 description: Some(parts[1].trim().to_owned()),
700 ..Default::default()
701 });
702 }
703 }
704 } else if !trimmed.is_empty() {
705 description_lines.push(trimmed);
706 }
707 }
708
709 if !description_lines.is_empty() {
710 doc.summary = Some(description_lines[0].to_owned());
711 doc.description = Some(description_lines.join(" "));
712 }
713
714 doc
715 }
716
717 fn parse_clojure_doc(&self, raw: &str) -> Documentation {
719 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
720
721 let content = raw.trim_matches('"');
723
724 doc.description = Some(content.to_owned());
725 let sentences: Vec<&str> = content.split(". ").collect();
726 if !sentences.is_empty() {
727 doc.summary = Some(sentences[0].to_owned());
728 }
729
730 doc
731 }
732
733 fn parse_ocamldoc(&self, raw: &str) -> Documentation {
735 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
736
737 let content = raw.trim_start_matches("(**").trim_end_matches("*)").trim();
739
740 let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
742 let return_re = Regex::new(r"@return\s+(.*)").unwrap();
743 let raise_re = Regex::new(r"@raise\s+(\w+)\s+(.*)").unwrap();
744
745 let mut description_lines = Vec::new();
746
747 for line in content.lines() {
748 let line = line.trim();
749
750 if let Some(caps) = param_re.captures(line) {
751 doc.params.push(ParamDoc {
752 name: caps
753 .get(1)
754 .map(|m| m.as_str().to_owned())
755 .unwrap_or_default(),
756 description: caps.get(2).map(|m| m.as_str().to_owned()),
757 ..Default::default()
758 });
759 } else if let Some(caps) = return_re.captures(line) {
760 doc.returns = Some(ReturnDoc {
761 description: caps.get(1).map(|m| m.as_str().to_owned()),
762 ..Default::default()
763 });
764 } else if let Some(caps) = raise_re.captures(line) {
765 doc.throws.push(ThrowsDoc {
766 exception_type: caps
767 .get(1)
768 .map(|m| m.as_str().to_owned())
769 .unwrap_or_default(),
770 description: caps.get(2).map(|m| m.as_str().to_owned()),
771 });
772 } else if !line.starts_with('@') {
773 description_lines.push(line);
774 }
775 }
776
777 if !description_lines.is_empty() {
778 doc.summary = Some(description_lines[0].to_owned());
779 doc.description = Some(description_lines.join(" "));
780 }
781
782 doc
783 }
784
785 fn parse_luadoc(&self, raw: &str) -> Documentation {
787 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
788
789 let content: String = raw
791 .lines()
792 .map(|l| l.trim_start_matches("---").trim_start_matches("--").trim())
793 .collect::<Vec<_>>()
794 .join("\n");
795
796 let param_re = Regex::new(r"@param\s+(\w+)\s+(\w+)\s*(.*)").unwrap();
798 let return_re = Regex::new(r"@return\s+(\w+)\s*(.*)").unwrap();
799
800 let mut description_lines = Vec::new();
801
802 for line in content.lines() {
803 let line = line.trim();
804
805 if let Some(caps) = param_re.captures(line) {
806 doc.params.push(ParamDoc {
807 name: caps
808 .get(1)
809 .map(|m| m.as_str().to_owned())
810 .unwrap_or_default(),
811 type_info: caps.get(2).map(|m| m.as_str().to_owned()),
812 description: caps.get(3).map(|m| m.as_str().to_owned()),
813 ..Default::default()
814 });
815 } else if let Some(caps) = return_re.captures(line) {
816 doc.returns = Some(ReturnDoc {
817 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
818 description: caps.get(2).map(|m| m.as_str().to_owned()),
819 });
820 } else if !line.starts_with('@') {
821 description_lines.push(line);
822 }
823 }
824
825 if !description_lines.is_empty() {
826 doc.summary = Some(description_lines[0].to_owned());
827 doc.description = Some(description_lines.join(" "));
828 }
829
830 doc
831 }
832
833 fn parse_roxygen(&self, raw: &str) -> Documentation {
835 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
836
837 let content: String = raw
839 .lines()
840 .map(|l| l.trim_start_matches("#'").trim())
841 .collect::<Vec<_>>()
842 .join("\n");
843
844 let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
845 let return_re = Regex::new(r"@return\s+(.*)").unwrap();
846
847 let mut description_lines = Vec::new();
848
849 for line in content.lines() {
850 let line = line.trim();
851
852 if let Some(caps) = param_re.captures(line) {
853 doc.params.push(ParamDoc {
854 name: caps
855 .get(1)
856 .map(|m| m.as_str().to_owned())
857 .unwrap_or_default(),
858 description: caps.get(2).map(|m| m.as_str().to_owned()),
859 ..Default::default()
860 });
861 } else if let Some(caps) = return_re.captures(line) {
862 doc.returns = Some(ReturnDoc {
863 description: caps.get(1).map(|m| m.as_str().to_owned()),
864 ..Default::default()
865 });
866 } else if !line.starts_with('@') {
867 description_lines.push(line);
868 }
869 }
870
871 if !description_lines.is_empty() {
872 doc.summary = Some(description_lines[0].to_owned());
873 doc.description = Some(description_lines.join(" "));
874 }
875
876 doc
877 }
878
879 fn parse_doxygen(&self, raw: &str) -> Documentation {
881 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
882
883 let content = self.strip_comment_markers(raw, "/**", "*/", "*");
885
886 let param_re = Regex::new(r"[@\\]param(?:\[(?:in|out|in,out)\])?\s+(\w+)\s+(.*)").unwrap();
887 let return_re = Regex::new(r"[@\\]returns?\s+(.*)").unwrap();
888 let throws_re = Regex::new(r"[@\\](?:throws?|exception)\s+(\w+)\s*(.*)").unwrap();
889 let brief_re = Regex::new(r"[@\\]brief\s+(.*)").unwrap();
890
891 let mut description_lines = Vec::new();
892
893 for line in content.lines() {
894 let line = line.trim();
895
896 if let Some(caps) = brief_re.captures(line) {
897 doc.summary = caps.get(1).map(|m| m.as_str().to_owned());
898 } else if let Some(caps) = param_re.captures(line) {
899 doc.params.push(ParamDoc {
900 name: caps
901 .get(1)
902 .map(|m| m.as_str().to_owned())
903 .unwrap_or_default(),
904 description: caps.get(2).map(|m| m.as_str().to_owned()),
905 ..Default::default()
906 });
907 } else if let Some(caps) = return_re.captures(line) {
908 doc.returns = Some(ReturnDoc {
909 description: caps.get(1).map(|m| m.as_str().to_owned()),
910 ..Default::default()
911 });
912 } else if let Some(caps) = throws_re.captures(line) {
913 doc.throws.push(ThrowsDoc {
914 exception_type: caps
915 .get(1)
916 .map(|m| m.as_str().to_owned())
917 .unwrap_or_default(),
918 description: caps.get(2).map(|m| m.as_str().to_owned()),
919 });
920 } else if !line.starts_with('@') && !line.starts_with('\\') {
921 description_lines.push(line);
922 }
923 }
924
925 if doc.summary.is_none() && !description_lines.is_empty() {
926 doc.summary = Some(description_lines[0].to_owned());
927 }
928 if !description_lines.is_empty() {
929 doc.description = Some(description_lines.join(" "));
930 }
931
932 doc
933 }
934
935 fn parse_bash_comment(&self, raw: &str) -> Documentation {
937 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
938
939 let content: String = raw
940 .lines()
941 .map(|l| l.trim_start_matches('#').trim())
942 .filter(|l| !l.is_empty())
943 .collect::<Vec<_>>()
944 .join(" ");
945
946 doc.description = Some(content.clone());
947 let sentences: Vec<&str> = content.split(". ").collect();
948 if !sentences.is_empty() {
949 doc.summary = Some(sentences[0].to_owned());
950 }
951
952 doc
953 }
954
955 fn parse_generic(&self, raw: &str) -> Documentation {
957 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
958
959 let content: String = raw
961 .lines()
962 .map(|l| {
963 l.trim()
964 .trim_start_matches("//")
965 .trim_start_matches("/*")
966 .trim_end_matches("*/")
967 .trim_start_matches('#')
968 .trim_start_matches("--")
969 .trim_start_matches(";;")
970 .trim()
971 })
972 .filter(|l| !l.is_empty())
973 .collect::<Vec<_>>()
974 .join(" ");
975
976 doc.description = Some(content.clone());
977 let sentences: Vec<&str> = content.split(". ").collect();
978 if !sentences.is_empty() {
979 doc.summary = Some(sentences[0].to_owned());
980 }
981
982 doc
983 }
984
985 fn strip_comment_markers(&self, raw: &str, start: &str, end: &str, line: &str) -> String {
988 let mut content = raw
989 .trim()
990 .trim_start_matches(start)
991 .trim_end_matches(end)
992 .to_owned();
993
994 content = content
996 .lines()
997 .map(|l| {
998 let trimmed = l.trim();
999 if trimmed.starts_with(line) {
1000 trimmed[line.len()..].trim_start()
1001 } else {
1002 trimmed
1003 }
1004 })
1005 .collect::<Vec<_>>()
1006 .join("\n");
1007
1008 content
1009 }
1010
1011 fn strip_rust_doc_markers(&self, raw: &str) -> String {
1012 raw.lines()
1013 .map(|l| {
1014 let trimmed = l.trim();
1015 if trimmed.starts_with("///") {
1016 trimmed[3..].trim_start()
1017 } else if trimmed.starts_with("//!") {
1018 trimmed[3..].trim_start()
1019 } else if trimmed.starts_with("/**") {
1020 trimmed[3..].trim_start()
1021 } else if trimmed.starts_with('*') {
1022 trimmed[1..].trim_start()
1023 } else if trimmed == "*/" {
1024 ""
1025 } else {
1026 trimmed
1027 }
1028 })
1029 .collect::<Vec<_>>()
1030 .join("\n")
1031 }
1032}
1033
1034impl Default for DocumentationExtractor {
1035 fn default() -> Self {
1036 Self::new()
1037 }
1038}
1039
1040#[cfg(test)]
1041mod tests {
1042 use super::*;
1043
1044 #[test]
1045 fn test_jsdoc_parsing() {
1046 let extractor = DocumentationExtractor::new();
1047
1048 let jsdoc = r#"/**
1049 * Calculate the sum of two numbers.
1050 *
1051 * @param {number} a - The first number
1052 * @param {number} b - The second number
1053 * @returns {number} The sum of a and b
1054 * @throws {Error} If inputs are not numbers
1055 * @example
1056 * add(1, 2) // returns 3
1057 */
1058 "#;
1059
1060 let doc = extractor.extract(jsdoc, Language::JavaScript);
1061
1062 assert!(doc.summary.is_some());
1063 assert!(doc.summary.unwrap().contains("Calculate"));
1064 assert_eq!(doc.params.len(), 2);
1065 assert_eq!(doc.params[0].name, "a");
1066 assert!(doc.params[0].type_info.as_ref().unwrap().contains("number"));
1067 assert!(doc.returns.is_some());
1068 assert_eq!(doc.throws.len(), 1);
1069 assert_eq!(doc.examples.len(), 1);
1070 }
1071
1072 #[test]
1073 fn test_python_docstring_parsing() {
1074 let extractor = DocumentationExtractor::new();
1075
1076 let docstring = r#""""
1077 Calculate the sum of two numbers.
1078
1079 Args:
1080 a (int): The first number
1081 b (int): The second number
1082
1083 Returns:
1084 int: The sum of a and b
1085
1086 Raises:
1087 ValueError: If inputs are not integers
1088 """"#;
1089
1090 let doc = extractor.extract(docstring, Language::Python);
1091
1092 assert!(doc.summary.is_some());
1093 assert!(doc.summary.unwrap().contains("Calculate"));
1094 assert_eq!(doc.params.len(), 2);
1095 assert_eq!(doc.params[0].name, "a");
1096 assert!(doc.returns.is_some());
1097 assert_eq!(doc.throws.len(), 1);
1098 }
1099
1100 #[test]
1101 fn test_rust_doc_parsing() {
1102 let extractor = DocumentationExtractor::new();
1103
1104 let rust_doc = r#"/// Calculate the sum of two numbers.
1105 ///
1106 /// # Arguments
1107 ///
1108 /// * `a` - The first number
1109 /// * `b` - The second number
1110 ///
1111 /// # Returns
1112 ///
1113 /// The sum of a and b
1114 "#;
1115
1116 let doc = extractor.extract(rust_doc, Language::Rust);
1117
1118 assert!(doc.summary.is_some());
1119 assert!(doc.summary.unwrap().contains("Calculate"));
1120 assert!(doc.returns.is_some());
1121 }
1122
1123 #[test]
1124 fn test_deprecated_detection() {
1125 let extractor = DocumentationExtractor::new();
1126
1127 let jsdoc = r#"/**
1128 * Old function.
1129 * @deprecated Use newFunction instead
1130 */
1131 "#;
1132
1133 let doc = extractor.extract(jsdoc, Language::JavaScript);
1134
1135 assert!(doc.is_deprecated);
1136 assert!(doc.deprecation_message.is_some());
1137 }
1138}