1use crate::analysis::types::{Documentation, Example, ParamDoc, ReturnDoc, ThrowsDoc};
7use crate::parser::Language;
8use regex::Regex;
9
10pub struct DocumentationExtractor {
12 jsdoc_param: Regex,
14 jsdoc_returns: Regex,
15 jsdoc_throws: Regex,
16 jsdoc_example: Regex,
17 jsdoc_tag: Regex,
18 python_param: Regex,
19 python_returns: Regex,
20 python_raises: Regex,
21 rust_param: Regex,
22}
23
24impl DocumentationExtractor {
25 pub fn new() -> Self {
27 Self {
28 jsdoc_param: Regex::new(r"@param\s+(?:\{([^}]+)\}\s+)?(\[)?(\w+)\]?\s*(?:-\s*)?(.*)")
30 .unwrap(),
31 jsdoc_returns: Regex::new(r"@returns?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
32 jsdoc_throws: Regex::new(r"@throws?\s+(?:\{([^}]+)\}\s+)?(.*)").unwrap(),
33 jsdoc_example: Regex::new(r"@example\s*").unwrap(),
35 jsdoc_tag: Regex::new(r"@(\w+)\s+(.*)").unwrap(),
36
37 python_param: Regex::new(r"^\s*(\w+)\s*(?:\(([^)]+)\))?\s*:\s*(.*)$").unwrap(),
39 python_returns: Regex::new(r"^\s*(?:(\w+)\s*:\s*)?(.*)$").unwrap(),
40 python_raises: Regex::new(r"^\s*(\w+)\s*:\s*(.*)$").unwrap(),
41
42 rust_param: Regex::new(r"^\s*\*\s+`(\w+)`\s*(?:-\s*)?(.*)$").unwrap(),
44 }
45 }
46
47 pub fn extract(&self, raw_doc: &str, language: Language) -> Documentation {
49 let raw_doc = raw_doc.trim();
50 if raw_doc.is_empty() {
51 return Documentation::default();
52 }
53
54 match language {
55 Language::JavaScript | Language::TypeScript => self.parse_jsdoc(raw_doc),
56 Language::Python => self.parse_python_docstring(raw_doc),
57 Language::Rust => self.parse_rust_doc(raw_doc),
58 Language::Java | Language::Kotlin => self.parse_javadoc(raw_doc),
59 Language::Go => self.parse_go_doc(raw_doc),
60 Language::Ruby => self.parse_ruby_doc(raw_doc),
61 Language::Php => self.parse_phpdoc(raw_doc),
62 Language::CSharp => self.parse_csharp_doc(raw_doc),
63 Language::Swift => self.parse_swift_doc(raw_doc),
64 Language::Scala => self.parse_scaladoc(raw_doc),
65 Language::Haskell => self.parse_haddock(raw_doc),
66 Language::Elixir => self.parse_exdoc(raw_doc),
67 Language::Clojure => self.parse_clojure_doc(raw_doc),
68 Language::OCaml => self.parse_ocamldoc(raw_doc),
69 Language::Lua => self.parse_luadoc(raw_doc),
70 Language::R => self.parse_roxygen(raw_doc),
71 Language::Cpp | Language::C => self.parse_doxygen(raw_doc),
72 Language::Bash => self.parse_bash_comment(raw_doc),
73 _ => self.parse_generic(raw_doc),
75 }
76 }
77
78 fn parse_jsdoc(&self, raw: &str) -> Documentation {
80 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
81
82 let content = self.strip_comment_markers(raw, "/**", "*/", "*");
84
85 let lines: Vec<&str> = content.lines().collect();
87
88 let mut description_lines = Vec::new();
90 let mut in_description = true;
91 let mut current_example = String::new();
92 let mut in_example = false;
93
94 for line in &lines {
95 let line = line.trim();
96
97 if line.starts_with('@') {
98 in_description = false;
99
100 if in_example && !line.starts_with("@example") {
102 if !current_example.is_empty() {
103 doc.examples.push(Example {
104 code: current_example.trim().to_owned(),
105 ..Default::default()
106 });
107 }
108 current_example.clear();
109 in_example = false;
110 }
111
112 if let Some(caps) = self.jsdoc_param.captures(line) {
114 let type_info = caps.get(1).map(|m| m.as_str().to_owned());
115 let is_optional = caps.get(2).is_some();
116 let name = caps.get(3).map_or("", |m| m.as_str());
117 let desc = caps.get(4).map_or("", |m| m.as_str());
118
119 doc.params.push(ParamDoc {
120 name: name.to_owned(),
121 type_info,
122 description: if desc.is_empty() {
123 None
124 } else {
125 Some(desc.to_owned())
126 },
127 is_optional,
128 default_value: None,
129 });
130 } else if let Some(caps) = self.jsdoc_returns.captures(line) {
131 doc.returns = Some(ReturnDoc {
132 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
133 description: caps.get(2).map(|m| m.as_str().to_owned()),
134 });
135 } else if let Some(caps) = self.jsdoc_throws.captures(line) {
136 doc.throws.push(ThrowsDoc {
137 exception_type: caps
138 .get(1)
139 .map_or_else(|| "Error".to_owned(), |m| m.as_str().to_owned()),
140 description: caps.get(2).map(|m| m.as_str().to_owned()),
141 });
142 } else if line.starts_with("@example") {
143 in_example = true;
144 let after_tag = line.strip_prefix("@example").unwrap_or("").trim();
146 if !after_tag.is_empty() {
147 current_example.push_str(after_tag);
148 current_example.push('\n');
149 }
150 } else if line.starts_with("@deprecated") {
151 doc.is_deprecated = true;
152 let msg = line.strip_prefix("@deprecated").unwrap_or("").trim();
153 if !msg.is_empty() {
154 doc.deprecation_message = Some(msg.to_owned());
155 }
156 } else if let Some(caps) = self.jsdoc_tag.captures(line) {
157 let tag = caps.get(1).map_or("", |m| m.as_str());
158 let value = caps.get(2).map_or("", |m| m.as_str());
159 doc.tags
160 .entry(tag.to_owned())
161 .or_default()
162 .push(value.to_owned());
163 }
164 } else if in_example {
165 current_example.push_str(line);
166 current_example.push('\n');
167 } else if in_description {
168 description_lines.push(line);
169 }
170 }
171
172 if !current_example.is_empty() {
174 doc.examples
175 .push(Example { code: current_example.trim().to_owned(), ..Default::default() });
176 }
177
178 if !description_lines.is_empty() {
180 let full_desc = description_lines.join("\n");
181 let sentences: Vec<&str> = full_desc.split(". ").collect();
182 if !sentences.is_empty() {
183 doc.summary = Some(sentences[0].to_owned());
184 }
185 doc.description = Some(full_desc);
186 }
187
188 doc
189 }
190
191 fn parse_python_docstring(&self, raw: &str) -> Documentation {
193 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
194
195 let content = raw
197 .trim_start_matches("\"\"\"")
198 .trim_end_matches("\"\"\"")
199 .trim_start_matches("'''")
200 .trim_end_matches("'''")
201 .trim();
202
203 let lines: Vec<&str> = content.lines().collect();
204
205 #[derive(PartialEq)]
206 enum Section {
207 Description,
208 Args,
209 Returns,
210 Raises,
211 Example,
212 Other,
213 }
214
215 let mut section = Section::Description;
216 let mut description_lines = Vec::new();
217 let mut current_param: Option<ParamDoc> = None;
218 let mut current_example = String::new();
219
220 for line in lines {
221 let trimmed = line.trim();
222
223 if trimmed == "Args:" || trimmed == "Arguments:" || trimmed == "Parameters:" {
225 section = Section::Args;
226 continue;
227 } else if trimmed == "Returns:" || trimmed == "Return:" {
228 section = Section::Returns;
229 continue;
230 } else if trimmed == "Raises:" || trimmed == "Throws:" || trimmed == "Exceptions:" {
231 section = Section::Raises;
232 continue;
233 } else if trimmed == "Example:" || trimmed == "Examples:" {
234 section = Section::Example;
235 continue;
236 } else if trimmed.ends_with(':') && !trimmed.contains(' ') {
237 section = Section::Other;
238 continue;
239 }
240
241 match section {
242 Section::Description => {
243 description_lines.push(trimmed);
244 },
245 Section::Args => {
246 if let Some(caps) = self.python_param.captures(trimmed) {
247 if let Some(param) = current_param.take() {
249 doc.params.push(param);
250 }
251
252 let name = caps.get(1).map_or("", |m| m.as_str());
253 let type_info = caps.get(2).map(|m| m.as_str().to_owned());
254 let desc = caps.get(3).map(|m| m.as_str());
255
256 current_param = Some(ParamDoc {
257 name: name.to_owned(),
258 type_info,
259 description: desc.map(String::from),
260 is_optional: false,
261 default_value: None,
262 });
263 } else if let Some(ref mut param) = current_param {
264 if let Some(ref mut desc) = param.description {
266 desc.push(' ');
267 desc.push_str(trimmed);
268 }
269 }
270 },
271 Section::Returns => {
272 if doc.returns.is_none() {
273 if let Some(caps) = self.python_returns.captures(trimmed) {
274 doc.returns = Some(ReturnDoc {
275 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
276 description: caps.get(2).map(|m| m.as_str().to_owned()),
277 });
278 }
279 } else if let Some(ref mut ret) = doc.returns {
280 if let Some(ref mut desc) = ret.description {
281 desc.push(' ');
282 desc.push_str(trimmed);
283 }
284 }
285 },
286 Section::Raises => {
287 if let Some(caps) = self.python_raises.captures(trimmed) {
288 doc.throws.push(ThrowsDoc {
289 exception_type: caps
290 .get(1)
291 .map(|m| m.as_str().to_owned())
292 .unwrap_or_default(),
293 description: caps.get(2).map(|m| m.as_str().to_owned()),
294 });
295 }
296 },
297 Section::Example => {
298 current_example.push_str(line);
299 current_example.push('\n');
300 },
301 Section::Other => {},
302 }
303 }
304
305 if let Some(param) = current_param {
307 doc.params.push(param);
308 }
309
310 if !current_example.is_empty() {
312 doc.examples.push(Example {
313 code: current_example.trim().to_owned(),
314 language: Some("python".to_owned()),
315 ..Default::default()
316 });
317 }
318
319 let desc = description_lines.join(" ");
321 if !desc.is_empty() {
322 let sentences: Vec<&str> = desc.split(". ").collect();
323 if !sentences.is_empty() {
324 doc.summary = Some(sentences[0].to_owned());
325 }
326 doc.description = Some(desc);
327 }
328
329 doc
330 }
331
332 fn parse_rust_doc(&self, raw: &str) -> Documentation {
334 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
335
336 let content = self.strip_rust_doc_markers(raw);
338
339 let lines: Vec<&str> = content.lines().collect();
340
341 #[derive(PartialEq)]
342 enum Section {
343 Description,
344 Arguments,
345 Returns,
346 Errors,
347 Panics,
348 Examples,
349 Safety,
350 }
351
352 let mut section = Section::Description;
353 let mut description_lines = Vec::new();
354 let mut current_example = String::new();
355
356 for line in lines {
357 let trimmed = line.trim();
358
359 if trimmed.starts_with("# ") {
361 let header = trimmed[2..].to_lowercase();
362 section = match header.as_str() {
363 "arguments" | "parameters" => Section::Arguments,
364 "returns" => Section::Returns,
365 "errors" => Section::Errors,
366 "panics" => Section::Panics,
367 "examples" | "example" => Section::Examples,
368 "safety" => Section::Safety,
369 _ => Section::Description,
370 };
371 continue;
372 }
373
374 match section {
375 Section::Description => {
376 description_lines.push(trimmed);
377 },
378 Section::Arguments => {
379 if let Some(caps) = self.rust_param.captures(trimmed) {
380 doc.params.push(ParamDoc {
381 name: caps
382 .get(1)
383 .map(|m| m.as_str().to_owned())
384 .unwrap_or_default(),
385 description: caps.get(2).map(|m| m.as_str().to_owned()),
386 ..Default::default()
387 });
388 }
389 },
390 Section::Returns => {
391 if doc.returns.is_none() {
392 doc.returns = Some(ReturnDoc {
393 description: Some(trimmed.to_owned()),
394 ..Default::default()
395 });
396 }
397 },
398 Section::Errors => {
399 if !trimmed.is_empty() {
400 doc.throws.push(ThrowsDoc {
401 exception_type: "Error".to_owned(),
402 description: Some(trimmed.to_owned()),
403 });
404 }
405 },
406 Section::Panics => {
407 doc.tags
408 .entry("panics".to_owned())
409 .or_default()
410 .push(trimmed.to_owned());
411 },
412 Section::Examples => {
413 current_example.push_str(line);
414 current_example.push('\n');
415 },
416 Section::Safety => {
417 doc.tags
418 .entry("safety".to_owned())
419 .or_default()
420 .push(trimmed.to_owned());
421 },
422 }
423 }
424
425 if !current_example.is_empty() {
427 let code_block_re = Regex::new(r"```(?:rust)?\n([\s\S]*?)```").unwrap();
429 for caps in code_block_re.captures_iter(¤t_example) {
430 if let Some(code) = caps.get(1) {
431 doc.examples.push(Example {
432 code: code.as_str().trim().to_owned(),
433 language: Some("rust".to_owned()),
434 ..Default::default()
435 });
436 }
437 }
438 }
439
440 let desc = description_lines.join(" ");
442 if !desc.is_empty() {
443 let sentences: Vec<&str> = desc.split(". ").collect();
444 if !sentences.is_empty() {
445 doc.summary = Some(sentences[0].to_owned());
446 }
447 doc.description = Some(desc);
448 }
449
450 doc
451 }
452
453 fn parse_javadoc(&self, raw: &str) -> Documentation {
455 self.parse_jsdoc(raw)
457 }
458
459 fn parse_go_doc(&self, raw: &str) -> Documentation {
461 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
462
463 let content: String = raw
465 .lines()
466 .map(|l| l.trim_start_matches("//").trim())
467 .collect::<Vec<_>>()
468 .join(" ");
469
470 let sentences: Vec<&str> = content.split(". ").collect();
472 if !sentences.is_empty() {
473 doc.summary = Some(sentences[0].to_owned());
474 }
475 doc.description = Some(content);
476
477 if raw.to_lowercase().contains("deprecated") {
479 doc.is_deprecated = true;
480 }
481
482 doc
483 }
484
485 fn parse_ruby_doc(&self, raw: &str) -> Documentation {
487 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
488
489 let content = self.strip_comment_markers(raw, "=begin", "=end", "#");
490
491 let param_re = Regex::new(r"@param\s+\[([^\]]+)\]\s+(\w+)\s+(.*)").unwrap();
493 let return_re = Regex::new(r"@return\s+\[([^\]]+)\]\s+(.*)").unwrap();
494 let raise_re = Regex::new(r"@raise\s+\[([^\]]+)\]\s+(.*)").unwrap();
495
496 for line in content.lines() {
497 let line = line.trim();
498
499 if let Some(caps) = param_re.captures(line) {
500 doc.params.push(ParamDoc {
501 name: caps
502 .get(2)
503 .map(|m| m.as_str().to_owned())
504 .unwrap_or_default(),
505 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
506 description: caps.get(3).map(|m| m.as_str().to_owned()),
507 ..Default::default()
508 });
509 } else if let Some(caps) = return_re.captures(line) {
510 doc.returns = Some(ReturnDoc {
511 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
512 description: caps.get(2).map(|m| m.as_str().to_owned()),
513 });
514 } else if let Some(caps) = raise_re.captures(line) {
515 doc.throws.push(ThrowsDoc {
516 exception_type: caps
517 .get(1)
518 .map(|m| m.as_str().to_owned())
519 .unwrap_or_default(),
520 description: caps.get(2).map(|m| m.as_str().to_owned()),
521 });
522 } else if !line.starts_with('@') && doc.description.is_none() {
523 doc.description = Some(line.to_owned());
524 doc.summary = Some(line.to_owned());
525 }
526 }
527
528 doc
529 }
530
531 fn parse_phpdoc(&self, raw: &str) -> Documentation {
533 self.parse_jsdoc(raw)
535 }
536
537 fn parse_csharp_doc(&self, raw: &str) -> Documentation {
539 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
540
541 let summary_re = Regex::new(r"<summary>([\s\S]*?)</summary>").unwrap();
543 let param_re = Regex::new(r#"<param name="(\w+)">([\s\S]*?)</param>"#).unwrap();
544 let returns_re = Regex::new(r"<returns>([\s\S]*?)</returns>").unwrap();
545 let exception_re =
546 Regex::new(r#"<exception cref="([^"]+)">([\s\S]*?)</exception>"#).unwrap();
547
548 if let Some(caps) = summary_re.captures(raw) {
549 let summary = caps.get(1).map(|m| m.as_str().trim().to_owned());
550 doc.summary = summary.clone();
551 doc.description = summary;
552 }
553
554 for caps in param_re.captures_iter(raw) {
555 doc.params.push(ParamDoc {
556 name: caps
557 .get(1)
558 .map(|m| m.as_str().to_owned())
559 .unwrap_or_default(),
560 description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
561 ..Default::default()
562 });
563 }
564
565 if let Some(caps) = returns_re.captures(raw) {
566 doc.returns = Some(ReturnDoc {
567 description: caps.get(1).map(|m| m.as_str().trim().to_owned()),
568 ..Default::default()
569 });
570 }
571
572 for caps in exception_re.captures_iter(raw) {
573 doc.throws.push(ThrowsDoc {
574 exception_type: caps
575 .get(1)
576 .map(|m| m.as_str().to_owned())
577 .unwrap_or_default(),
578 description: caps.get(2).map(|m| m.as_str().trim().to_owned()),
579 });
580 }
581
582 doc
583 }
584
585 fn parse_swift_doc(&self, raw: &str) -> Documentation {
587 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
588
589 let content = self.strip_comment_markers(raw, "/**", "*/", "///");
591
592 let param_re = Regex::new(r"-\s*Parameter\s+(\w+):\s*(.*)").unwrap();
593 let returns_re = Regex::new(r"-\s*Returns:\s*(.*)").unwrap();
594 let throws_re = Regex::new(r"-\s*Throws:\s*(.*)").unwrap();
595
596 let mut description_lines = Vec::new();
597
598 for line in content.lines() {
599 let line = line.trim();
600
601 if let Some(caps) = param_re.captures(line) {
602 doc.params.push(ParamDoc {
603 name: caps
604 .get(1)
605 .map(|m| m.as_str().to_owned())
606 .unwrap_or_default(),
607 description: caps.get(2).map(|m| m.as_str().to_owned()),
608 ..Default::default()
609 });
610 } else if let Some(caps) = returns_re.captures(line) {
611 doc.returns = Some(ReturnDoc {
612 description: caps.get(1).map(|m| m.as_str().to_owned()),
613 ..Default::default()
614 });
615 } else if let Some(caps) = throws_re.captures(line) {
616 doc.throws.push(ThrowsDoc {
617 exception_type: "Error".to_owned(),
618 description: caps.get(1).map(|m| m.as_str().to_owned()),
619 });
620 } else if !line.starts_with('-') && !line.is_empty() {
621 description_lines.push(line);
622 }
623 }
624
625 if !description_lines.is_empty() {
626 let desc = description_lines.join(" ");
627 doc.summary = Some(description_lines[0].to_owned());
628 doc.description = Some(desc);
629 }
630
631 doc
632 }
633
634 fn parse_scaladoc(&self, raw: &str) -> Documentation {
636 self.parse_javadoc(raw)
638 }
639
640 fn parse_haddock(&self, raw: &str) -> Documentation {
642 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
643
644 let content = raw
646 .lines()
647 .map(|l| {
648 l.trim_start_matches("--")
649 .trim_start_matches('|')
650 .trim_start_matches('^')
651 .trim()
652 })
653 .collect::<Vec<_>>()
654 .join(" ");
655
656 doc.description = Some(content.clone());
657 let sentences: Vec<&str> = content.split(". ").collect();
658 if !sentences.is_empty() {
659 doc.summary = Some(sentences[0].to_owned());
660 }
661
662 doc
663 }
664
665 fn parse_exdoc(&self, raw: &str) -> Documentation {
667 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
668
669 let content = raw
671 .trim_start_matches("@doc")
672 .trim_start_matches("@moduledoc")
673 .trim()
674 .trim_start_matches("\"\"\"")
675 .trim_end_matches("\"\"\"")
676 .trim();
677
678 let lines: Vec<&str> = content.lines().collect();
680 let mut description_lines = Vec::new();
681
682 for line in lines {
683 let trimmed = line.trim();
684
685 if trimmed.starts_with("##") {
687 continue;
689 }
690
691 if trimmed.starts_with('*') || trimmed.starts_with('-') {
692 let item = trimmed.trim_start_matches(['*', '-']).trim();
694 if item.contains(':') {
695 let parts: Vec<&str> = item.splitn(2, ':').collect();
696 if parts.len() == 2 {
697 doc.params.push(ParamDoc {
698 name: parts[0].trim().to_owned(),
699 description: Some(parts[1].trim().to_owned()),
700 ..Default::default()
701 });
702 }
703 }
704 } else if !trimmed.is_empty() {
705 description_lines.push(trimmed);
706 }
707 }
708
709 if !description_lines.is_empty() {
710 doc.summary = Some(description_lines[0].to_owned());
711 doc.description = Some(description_lines.join(" "));
712 }
713
714 doc
715 }
716
717 fn parse_clojure_doc(&self, raw: &str) -> Documentation {
719 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
720
721 let content = raw.trim_matches('"');
723
724 doc.description = Some(content.to_owned());
725 let sentences: Vec<&str> = content.split(". ").collect();
726 if !sentences.is_empty() {
727 doc.summary = Some(sentences[0].to_owned());
728 }
729
730 doc
731 }
732
733 fn parse_ocamldoc(&self, raw: &str) -> Documentation {
735 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
736
737 let content = raw.trim_start_matches("(**").trim_end_matches("*)").trim();
739
740 let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
742 let return_re = Regex::new(r"@return\s+(.*)").unwrap();
743 let raise_re = Regex::new(r"@raise\s+(\w+)\s+(.*)").unwrap();
744
745 let mut description_lines = Vec::new();
746
747 for line in content.lines() {
748 let line = line.trim();
749
750 if let Some(caps) = param_re.captures(line) {
751 doc.params.push(ParamDoc {
752 name: caps
753 .get(1)
754 .map(|m| m.as_str().to_owned())
755 .unwrap_or_default(),
756 description: caps.get(2).map(|m| m.as_str().to_owned()),
757 ..Default::default()
758 });
759 } else if let Some(caps) = return_re.captures(line) {
760 doc.returns = Some(ReturnDoc {
761 description: caps.get(1).map(|m| m.as_str().to_owned()),
762 ..Default::default()
763 });
764 } else if let Some(caps) = raise_re.captures(line) {
765 doc.throws.push(ThrowsDoc {
766 exception_type: caps
767 .get(1)
768 .map(|m| m.as_str().to_owned())
769 .unwrap_or_default(),
770 description: caps.get(2).map(|m| m.as_str().to_owned()),
771 });
772 } else if !line.starts_with('@') {
773 description_lines.push(line);
774 }
775 }
776
777 if !description_lines.is_empty() {
778 doc.summary = Some(description_lines[0].to_owned());
779 doc.description = Some(description_lines.join(" "));
780 }
781
782 doc
783 }
784
785 fn parse_luadoc(&self, raw: &str) -> Documentation {
787 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
788
789 let content: String = raw
791 .lines()
792 .map(|l| l.trim_start_matches("---").trim_start_matches("--").trim())
793 .collect::<Vec<_>>()
794 .join("\n");
795
796 let param_re = Regex::new(r"@param\s+(\w+)\s+(\w+)\s*(.*)").unwrap();
798 let return_re = Regex::new(r"@return\s+(\w+)\s*(.*)").unwrap();
799
800 let mut description_lines = Vec::new();
801
802 for line in content.lines() {
803 let line = line.trim();
804
805 if let Some(caps) = param_re.captures(line) {
806 doc.params.push(ParamDoc {
807 name: caps
808 .get(1)
809 .map(|m| m.as_str().to_owned())
810 .unwrap_or_default(),
811 type_info: caps.get(2).map(|m| m.as_str().to_owned()),
812 description: caps.get(3).map(|m| m.as_str().to_owned()),
813 ..Default::default()
814 });
815 } else if let Some(caps) = return_re.captures(line) {
816 doc.returns = Some(ReturnDoc {
817 type_info: caps.get(1).map(|m| m.as_str().to_owned()),
818 description: caps.get(2).map(|m| m.as_str().to_owned()),
819 });
820 } else if !line.starts_with('@') {
821 description_lines.push(line);
822 }
823 }
824
825 if !description_lines.is_empty() {
826 doc.summary = Some(description_lines[0].to_owned());
827 doc.description = Some(description_lines.join(" "));
828 }
829
830 doc
831 }
832
833 fn parse_roxygen(&self, raw: &str) -> Documentation {
835 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
836
837 let content: String = raw
839 .lines()
840 .map(|l| l.trim_start_matches("#'").trim())
841 .collect::<Vec<_>>()
842 .join("\n");
843
844 let param_re = Regex::new(r"@param\s+(\w+)\s+(.*)").unwrap();
845 let return_re = Regex::new(r"@return\s+(.*)").unwrap();
846
847 let mut description_lines = Vec::new();
848
849 for line in content.lines() {
850 let line = line.trim();
851
852 if let Some(caps) = param_re.captures(line) {
853 doc.params.push(ParamDoc {
854 name: caps
855 .get(1)
856 .map(|m| m.as_str().to_owned())
857 .unwrap_or_default(),
858 description: caps.get(2).map(|m| m.as_str().to_owned()),
859 ..Default::default()
860 });
861 } else if let Some(caps) = return_re.captures(line) {
862 doc.returns = Some(ReturnDoc {
863 description: caps.get(1).map(|m| m.as_str().to_owned()),
864 ..Default::default()
865 });
866 } else if !line.starts_with('@') {
867 description_lines.push(line);
868 }
869 }
870
871 if !description_lines.is_empty() {
872 doc.summary = Some(description_lines[0].to_owned());
873 doc.description = Some(description_lines.join(" "));
874 }
875
876 doc
877 }
878
879 fn parse_doxygen(&self, raw: &str) -> Documentation {
881 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
882
883 let content = self.strip_comment_markers(raw, "/**", "*/", "*");
885
886 let param_re = Regex::new(r"[@\\]param(?:\[(?:in|out|in,out)\])?\s+(\w+)\s+(.*)").unwrap();
887 let return_re = Regex::new(r"[@\\]returns?\s+(.*)").unwrap();
888 let throws_re = Regex::new(r"[@\\](?:throws?|exception)\s+(\w+)\s*(.*)").unwrap();
889 let brief_re = Regex::new(r"[@\\]brief\s+(.*)").unwrap();
890
891 let mut description_lines = Vec::new();
892
893 for line in content.lines() {
894 let line = line.trim();
895
896 if let Some(caps) = brief_re.captures(line) {
897 doc.summary = caps.get(1).map(|m| m.as_str().to_owned());
898 } else if let Some(caps) = param_re.captures(line) {
899 doc.params.push(ParamDoc {
900 name: caps
901 .get(1)
902 .map(|m| m.as_str().to_owned())
903 .unwrap_or_default(),
904 description: caps.get(2).map(|m| m.as_str().to_owned()),
905 ..Default::default()
906 });
907 } else if let Some(caps) = return_re.captures(line) {
908 doc.returns = Some(ReturnDoc {
909 description: caps.get(1).map(|m| m.as_str().to_owned()),
910 ..Default::default()
911 });
912 } else if let Some(caps) = throws_re.captures(line) {
913 doc.throws.push(ThrowsDoc {
914 exception_type: caps
915 .get(1)
916 .map(|m| m.as_str().to_owned())
917 .unwrap_or_default(),
918 description: caps.get(2).map(|m| m.as_str().to_owned()),
919 });
920 } else if !line.starts_with('@') && !line.starts_with('\\') {
921 description_lines.push(line);
922 }
923 }
924
925 if doc.summary.is_none() && !description_lines.is_empty() {
926 doc.summary = Some(description_lines[0].to_owned());
927 }
928 if !description_lines.is_empty() {
929 doc.description = Some(description_lines.join(" "));
930 }
931
932 doc
933 }
934
935 fn parse_bash_comment(&self, raw: &str) -> Documentation {
937 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
938
939 let content: String = raw
940 .lines()
941 .map(|l| l.trim_start_matches('#').trim())
942 .filter(|l| !l.is_empty())
943 .collect::<Vec<_>>()
944 .join(" ");
945
946 doc.description = Some(content.clone());
947 let sentences: Vec<&str> = content.split(". ").collect();
948 if !sentences.is_empty() {
949 doc.summary = Some(sentences[0].to_owned());
950 }
951
952 doc
953 }
954
955 fn parse_generic(&self, raw: &str) -> Documentation {
957 let mut doc = Documentation { raw: Some(raw.to_owned()), ..Default::default() };
958
959 let content: String = raw
961 .lines()
962 .map(|l| {
963 l.trim()
964 .trim_start_matches("//")
965 .trim_start_matches("/*")
966 .trim_end_matches("*/")
967 .trim_start_matches('#')
968 .trim_start_matches("--")
969 .trim_start_matches(";;")
970 .trim()
971 })
972 .filter(|l| !l.is_empty())
973 .collect::<Vec<_>>()
974 .join(" ");
975
976 doc.description = Some(content.clone());
977 let sentences: Vec<&str> = content.split(". ").collect();
978 if !sentences.is_empty() {
979 doc.summary = Some(sentences[0].to_owned());
980 }
981
982 doc
983 }
984
985 fn strip_comment_markers(&self, raw: &str, start: &str, end: &str, line: &str) -> String {
988 let mut content = raw
989 .trim()
990 .trim_start_matches(start)
991 .trim_end_matches(end)
992 .to_owned();
993
994 content = content
996 .lines()
997 .map(|l| {
998 let trimmed = l.trim();
999 if trimmed.starts_with(line) {
1000 trimmed[line.len()..].trim_start()
1001 } else {
1002 trimmed
1003 }
1004 })
1005 .collect::<Vec<_>>()
1006 .join("\n");
1007
1008 content
1009 }
1010
1011 fn strip_rust_doc_markers(&self, raw: &str) -> String {
1012 raw.lines()
1013 .map(|l| {
1014 let trimmed = l.trim();
1015 if trimmed.starts_with("///") {
1016 trimmed[3..].trim_start()
1017 } else if trimmed.starts_with("//!") {
1018 trimmed[3..].trim_start()
1019 } else if trimmed.starts_with("/**") {
1020 trimmed[3..].trim_start()
1021 } else if trimmed.starts_with('*') {
1022 trimmed[1..].trim_start()
1023 } else if trimmed == "*/" {
1024 ""
1025 } else {
1026 trimmed
1027 }
1028 })
1029 .collect::<Vec<_>>()
1030 .join("\n")
1031 }
1032}
1033
1034impl Default for DocumentationExtractor {
1035 fn default() -> Self {
1036 Self::new()
1037 }
1038}
1039
1040#[cfg(test)]
1041mod tests {
1042 use super::*;
1043
1044 fn ext() -> DocumentationExtractor {
1049 DocumentationExtractor::new()
1050 }
1051
1052 #[test]
1057 fn test_empty_string_returns_default() {
1058 let doc = ext().extract("", Language::JavaScript);
1059 assert!(doc.summary.is_none());
1060 assert!(doc.description.is_none());
1061 assert!(doc.params.is_empty());
1062 assert!(doc.returns.is_none());
1063 assert!(doc.throws.is_empty());
1064 assert!(doc.examples.is_empty());
1065 assert!(!doc.is_deprecated);
1066 assert!(doc.raw.is_none());
1067 }
1068
1069 #[test]
1070 fn test_whitespace_only_returns_default() {
1071 let doc = ext().extract(" \n\t \n ", Language::Python);
1072 assert!(doc.summary.is_none());
1073 assert!(doc.raw.is_none());
1074 }
1075
1076 #[test]
1081 fn test_jsdoc_parsing() {
1082 let jsdoc = r#"/**
1083 * Calculate the sum of two numbers.
1084 *
1085 * @param {number} a - The first number
1086 * @param {number} b - The second number
1087 * @returns {number} The sum of a and b
1088 * @throws {Error} If inputs are not numbers
1089 * @example
1090 * add(1, 2) // returns 3
1091 */
1092 "#;
1093
1094 let doc = ext().extract(jsdoc, Language::JavaScript);
1095
1096 assert!(doc.summary.is_some());
1097 assert!(doc.summary.unwrap().contains("Calculate"));
1098 assert_eq!(doc.params.len(), 2);
1099 assert_eq!(doc.params[0].name, "a");
1100 assert!(doc.params[0].type_info.as_ref().unwrap().contains("number"));
1101 assert!(doc.returns.is_some());
1102 assert_eq!(doc.throws.len(), 1);
1103 assert_eq!(doc.examples.len(), 1);
1104 }
1105
1106 #[test]
1107 fn test_jsdoc_optional_param() {
1108 let jsdoc = "/**\n * @param {string} [name] - Optional name\n */";
1109 let doc = ext().extract(jsdoc, Language::JavaScript);
1110
1111 assert_eq!(doc.params.len(), 1);
1112 assert_eq!(doc.params[0].name, "name");
1113 assert!(doc.params[0].is_optional);
1114 assert_eq!(doc.params[0].type_info.as_deref(), Some("string"));
1115 }
1116
1117 #[test]
1118 fn test_jsdoc_param_no_type() {
1119 let jsdoc = "/**\n * @param x - The value\n */";
1120 let doc = ext().extract(jsdoc, Language::JavaScript);
1121
1122 assert_eq!(doc.params.len(), 1);
1123 assert_eq!(doc.params[0].name, "x");
1124 assert!(doc.params[0].type_info.is_none());
1125 assert_eq!(doc.params[0].description.as_deref(), Some("The value"));
1126 }
1127
1128 #[test]
1129 fn test_jsdoc_param_no_description() {
1130 let jsdoc = "/**\n * @param {number} x\n */";
1131 let doc = ext().extract(jsdoc, Language::JavaScript);
1132
1133 assert_eq!(doc.params.len(), 1);
1134 assert_eq!(doc.params[0].name, "x");
1135 assert!(doc.params[0].description.is_none());
1137 }
1138
1139 #[test]
1140 fn test_jsdoc_multiple_throws() {
1141 let jsdoc = "/**\n * Do stuff.\n * @throws {TypeError} Bad type\n * @throws {RangeError} Out of range\n */";
1142 let doc = ext().extract(jsdoc, Language::JavaScript);
1143
1144 assert_eq!(doc.throws.len(), 2);
1145 assert_eq!(doc.throws[0].exception_type, "TypeError");
1146 assert_eq!(doc.throws[1].exception_type, "RangeError");
1147 }
1148
1149 #[test]
1150 fn test_jsdoc_returns_without_type() {
1151 let jsdoc = "/**\n * @returns The result\n */";
1152 let doc = ext().extract(jsdoc, Language::JavaScript);
1153
1154 assert!(doc.returns.is_some());
1155 let ret = doc.returns.unwrap();
1156 assert!(ret.type_info.is_none());
1157 assert_eq!(ret.description.as_deref(), Some("The result"));
1158 }
1159
1160 #[test]
1161 fn test_jsdoc_multiple_examples() {
1162 let jsdoc = "/**\n * Math helper.\n * @example\n * add(1,2)\n * @example\n * add(3,4)\n */";
1166 let doc = ext().extract(jsdoc, Language::JavaScript);
1167
1168 assert_eq!(doc.examples.len(), 1);
1169 assert!(doc.examples[0].code.contains("add(1,2)"));
1170 assert!(doc.examples[0].code.contains("add(3,4)"));
1171 }
1172
1173 #[test]
1174 fn test_jsdoc_deprecated_without_message() {
1175 let jsdoc = "/**\n * Old.\n * @deprecated\n */";
1176 let doc = ext().extract(jsdoc, Language::JavaScript);
1177
1178 assert!(doc.is_deprecated);
1179 assert!(doc.deprecation_message.is_none());
1181 }
1182
1183 #[test]
1184 fn test_jsdoc_deprecated_with_message() {
1185 let jsdoc = "/**\n * Old.\n * @deprecated Use bar instead\n */";
1186 let doc = ext().extract(jsdoc, Language::JavaScript);
1187
1188 assert!(doc.is_deprecated);
1189 assert_eq!(doc.deprecation_message.as_deref(), Some("Use bar instead"));
1190 }
1191
1192 #[test]
1193 fn test_jsdoc_custom_tags() {
1194 let jsdoc = "/**\n * My func.\n * @since 2.0\n * @see otherFunc\n */";
1195 let doc = ext().extract(jsdoc, Language::JavaScript);
1196
1197 assert!(doc.tags.contains_key("since"));
1198 assert!(doc.tags.contains_key("see"));
1199 assert_eq!(doc.tags["since"][0], "2.0");
1200 }
1201
1202 #[test]
1203 fn test_jsdoc_multiline_description() {
1204 let jsdoc = "/**\n * First sentence. Second sentence.\n * Third sentence.\n */";
1205 let doc = ext().extract(jsdoc, Language::JavaScript);
1206
1207 let summary = doc.summary.unwrap();
1209 assert!(summary.contains("First sentence"));
1210 let desc = doc.description.unwrap();
1211 assert!(desc.contains("Third sentence."));
1212 }
1213
1214 #[test]
1215 fn test_jsdoc_typescript_dispatch() {
1216 let jsdoc = "/**\n * A TS function.\n * @param {string} s - input\n */";
1217 let doc = ext().extract(jsdoc, Language::TypeScript);
1218
1219 assert!(doc.summary.unwrap().contains("TS function"));
1220 assert_eq!(doc.params.len(), 1);
1221 }
1222
1223 #[test]
1224 fn test_jsdoc_example_with_inline_content() {
1225 let jsdoc = "/**\n * Func.\n * @example const x = foo();\n */";
1226 let doc = ext().extract(jsdoc, Language::JavaScript);
1227
1228 assert_eq!(doc.examples.len(), 1);
1229 assert!(doc.examples[0].code.contains("const x = foo();"));
1230 }
1231
1232 #[test]
1237 fn test_python_docstring_parsing() {
1238 let docstring = r#""""
1239 Calculate the sum of two numbers.
1240
1241 Args:
1242 a (int): The first number
1243 b (int): The second number
1244
1245 Returns:
1246 int: The sum of a and b
1247
1248 Raises:
1249 ValueError: If inputs are not integers
1250 """"#;
1251
1252 let doc = ext().extract(docstring, Language::Python);
1253
1254 assert!(doc.summary.is_some());
1255 assert!(doc.summary.unwrap().contains("Calculate"));
1256 assert_eq!(doc.params.len(), 2);
1257 assert_eq!(doc.params[0].name, "a");
1258 assert!(doc.returns.is_some());
1259 assert_eq!(doc.throws.len(), 1);
1260 }
1261
1262 #[test]
1263 fn test_python_single_quote_docstring() {
1264 let docstring = "'''Sum two numbers.\n\nArgs:\n x (float): first\n'''";
1265 let doc = ext().extract(docstring, Language::Python);
1266
1267 assert!(doc.summary.unwrap().contains("Sum two numbers"));
1268 assert_eq!(doc.params.len(), 1);
1269 assert_eq!(doc.params[0].name, "x");
1270 assert_eq!(doc.params[0].type_info.as_deref(), Some("float"));
1271 }
1272
1273 #[test]
1274 fn test_python_parameters_header() {
1275 let docstring = "\"\"\"Do stuff.\n\nParameters:\n n (int): count\n\"\"\"";
1276 let doc = ext().extract(docstring, Language::Python);
1277
1278 assert_eq!(doc.params.len(), 1);
1279 assert_eq!(doc.params[0].name, "n");
1280 }
1281
1282 #[test]
1283 fn test_python_arguments_header() {
1284 let docstring = "\"\"\"Do stuff.\n\nArguments:\n n (int): count\n\"\"\"";
1285 let doc = ext().extract(docstring, Language::Python);
1286
1287 assert_eq!(doc.params.len(), 1);
1288 assert_eq!(doc.params[0].name, "n");
1289 }
1290
1291 #[test]
1292 fn test_python_multiple_raises() {
1293 let docstring =
1294 "\"\"\"Do stuff.\n\nRaises:\n ValueError: bad\n TypeError: wrong type\n\"\"\"";
1295 let doc = ext().extract(docstring, Language::Python);
1296
1297 assert_eq!(doc.throws.len(), 2);
1298 assert_eq!(doc.throws[0].exception_type, "ValueError");
1299 assert_eq!(doc.throws[1].exception_type, "TypeError");
1300 }
1301
1302 #[test]
1303 fn test_python_throws_header() {
1304 let docstring = "\"\"\"Do stuff.\n\nThrows:\n IOError: disk full\n\"\"\"";
1305 let doc = ext().extract(docstring, Language::Python);
1306
1307 assert_eq!(doc.throws.len(), 1);
1308 assert_eq!(doc.throws[0].exception_type, "IOError");
1309 }
1310
1311 #[test]
1312 fn test_python_exceptions_header() {
1313 let docstring = "\"\"\"Do stuff.\n\nExceptions:\n OSError: not found\n\"\"\"";
1314 let doc = ext().extract(docstring, Language::Python);
1315
1316 assert_eq!(doc.throws.len(), 1);
1317 assert_eq!(doc.throws[0].exception_type, "OSError");
1318 }
1319
1320 #[test]
1321 fn test_python_example_section() {
1322 let docstring = "\"\"\"Do stuff.\n\nExample:\n >>> foo(1)\n 42\n\"\"\"";
1323 let doc = ext().extract(docstring, Language::Python);
1324
1325 assert_eq!(doc.examples.len(), 1);
1326 assert!(doc.examples[0].code.contains("foo(1)"));
1327 assert_eq!(doc.examples[0].language.as_deref(), Some("python"));
1328 }
1329
1330 #[test]
1331 fn test_python_examples_plural_header() {
1332 let docstring = "\"\"\"Do stuff.\n\nExamples:\n >>> bar()\n\"\"\"";
1333 let doc = ext().extract(docstring, Language::Python);
1334
1335 assert_eq!(doc.examples.len(), 1);
1336 }
1337
1338 #[test]
1339 fn test_python_return_singular_header() {
1340 let docstring = "\"\"\"Do stuff.\n\nReturn:\n int: the result\n\"\"\"";
1341 let doc = ext().extract(docstring, Language::Python);
1342
1343 assert!(doc.returns.is_some());
1344 }
1345
1346 #[test]
1347 fn test_python_param_no_type() {
1348 let docstring = "\"\"\"Do stuff.\n\nArgs:\n name: the name value\n\"\"\"";
1349 let doc = ext().extract(docstring, Language::Python);
1350
1351 assert_eq!(doc.params.len(), 1);
1352 assert_eq!(doc.params[0].name, "name");
1353 assert!(doc.params[0].type_info.is_none());
1354 }
1355
1356 #[test]
1357 fn test_python_multiline_param_description() {
1358 let docstring =
1359 "\"\"\"Do stuff.\n\nArgs:\n x (int): First line\n continued here\n\"\"\"";
1360 let doc = ext().extract(docstring, Language::Python);
1361
1362 assert_eq!(doc.params.len(), 1);
1363 let desc = doc.params[0].description.as_ref().unwrap();
1364 assert!(desc.contains("First line"));
1365 assert!(desc.contains("continued here"));
1366 }
1367
1368 #[test]
1369 fn test_python_multiline_returns_description() {
1370 let docstring =
1371 "\"\"\"Do stuff.\n\nReturns:\n int: First line\n more info\n\"\"\"";
1372 let doc = ext().extract(docstring, Language::Python);
1373
1374 let ret = doc.returns.unwrap();
1375 let desc = ret.description.unwrap();
1376 assert!(desc.contains("First line"));
1377 assert!(desc.contains("more info"));
1378 }
1379
1380 #[test]
1381 fn test_python_description_only() {
1382 let docstring = "\"\"\"A simple description with no sections.\"\"\"";
1383 let doc = ext().extract(docstring, Language::Python);
1384
1385 assert!(doc.summary.unwrap().contains("simple description"));
1386 assert!(doc.params.is_empty());
1387 assert!(doc.returns.is_none());
1388 }
1389
1390 #[test]
1391 fn test_python_other_section_ignored() {
1392 let docstring = "\"\"\"Do stuff.\n\nNotes:\n Some note here.\n\"\"\"";
1393 let doc = ext().extract(docstring, Language::Python);
1394
1395 assert!(doc.params.is_empty());
1397 assert!(doc.returns.is_none());
1398 }
1399
1400 #[test]
1405 fn test_rust_doc_parsing() {
1406 let rust_doc = "/// Calculate the sum of two numbers.\n///\n/// # Arguments\n///\n/// * `a` - The first number\n/// * `b` - The second number\n///\n/// # Returns\n///\n/// The sum of a and b";
1407
1408 let doc = ext().extract(rust_doc, Language::Rust);
1409
1410 assert!(doc.summary.is_some());
1411 assert!(doc.summary.unwrap().contains("Calculate"));
1412 assert_eq!(doc.params.len(), 2);
1413 assert_eq!(doc.params[0].name, "a");
1414 assert_eq!(doc.params[1].name, "b");
1415 assert!(doc.returns.is_some());
1416 }
1417
1418 #[test]
1419 fn test_rust_inner_doc_comment() {
1420 let doc_str = "//! Module level documentation.\n//! Second line.";
1421 let doc = ext().extract(doc_str, Language::Rust);
1422
1423 assert!(doc.summary.unwrap().contains("Module level documentation"));
1424 }
1425
1426 #[test]
1427 fn test_rust_block_doc_comment() {
1428 let doc_str = "/** Block doc comment.\n * More details here.\n */";
1429 let doc = ext().extract(doc_str, Language::Rust);
1430
1431 assert!(doc.description.unwrap().contains("Block doc comment"));
1432 }
1433
1434 #[test]
1435 fn test_rust_errors_section() {
1436 let doc_str = "/// Do something.\n///\n/// # Errors\n///\n/// Returns Err if file not found.\n/// Also returns Err on permission denied.";
1437 let doc = ext().extract(doc_str, Language::Rust);
1438
1439 assert_eq!(doc.throws.len(), 2);
1440 assert_eq!(doc.throws[0].exception_type, "Error");
1441 assert!(doc.throws[0]
1442 .description
1443 .as_ref()
1444 .unwrap()
1445 .contains("file not found"));
1446 }
1447
1448 #[test]
1449 fn test_rust_panics_section() {
1450 let doc_str =
1452 "/// Do something.\n///\n/// # Panics\n///\n/// Panics if index is out of bounds.";
1453 let doc = ext().extract(doc_str, Language::Rust);
1454
1455 assert!(doc.tags.contains_key("panics"));
1456 let panics_entries = &doc.tags["panics"];
1457 assert!(panics_entries.iter().any(|e| e.contains("out of bounds")));
1458 }
1459
1460 #[test]
1461 fn test_rust_safety_section() {
1462 let doc_str =
1464 "/// Unsafe op.\n///\n/// # Safety\n///\n/// Caller must ensure pointer is valid.";
1465 let doc = ext().extract(doc_str, Language::Rust);
1466
1467 assert!(doc.tags.contains_key("safety"));
1468 let safety_entries = &doc.tags["safety"];
1469 assert!(safety_entries
1470 .iter()
1471 .any(|e| e.contains("pointer is valid")));
1472 }
1473
1474 #[test]
1475 fn test_rust_examples_with_code_block() {
1476 let doc_str =
1477 "/// A function.\n///\n/// # Examples\n///\n/// ```rust\n/// let x = foo();\n/// ```";
1478 let doc = ext().extract(doc_str, Language::Rust);
1479
1480 assert_eq!(doc.examples.len(), 1);
1481 assert!(doc.examples[0].code.contains("let x = foo();"));
1482 assert_eq!(doc.examples[0].language.as_deref(), Some("rust"));
1483 }
1484
1485 #[test]
1486 fn test_rust_examples_code_block_no_lang() {
1487 let doc_str = "/// A function.\n///\n/// # Examples\n///\n/// ```\n/// foo();\n/// ```";
1488 let doc = ext().extract(doc_str, Language::Rust);
1489
1490 assert_eq!(doc.examples.len(), 1);
1491 assert!(doc.examples[0].code.contains("foo();"));
1492 }
1493
1494 #[test]
1495 fn test_rust_example_singular_header() {
1496 let doc_str = "/// A function.\n///\n/// # Example\n///\n/// ```\n/// bar();\n/// ```";
1497 let doc = ext().extract(doc_str, Language::Rust);
1498
1499 assert_eq!(doc.examples.len(), 1);
1500 }
1501
1502 #[test]
1503 fn test_rust_parameters_header() {
1504 let doc_str = "/// Do it.\n///\n/// # Parameters\n///\n/// * `x` - The x value";
1505 let doc = ext().extract(doc_str, Language::Rust);
1506
1507 assert_eq!(doc.params.len(), 1);
1508 assert_eq!(doc.params[0].name, "x");
1509 }
1510
1511 #[test]
1512 fn test_rust_unknown_header_falls_back_to_description() {
1513 let doc_str = "/// Do it.\n///\n/// # Implementation Details\n///\n/// Uses a hash map.";
1514 let doc = ext().extract(doc_str, Language::Rust);
1515
1516 let desc = doc.description.unwrap();
1518 assert!(desc.contains("Uses a hash map"));
1519 }
1520
1521 #[test]
1522 fn test_rust_errors_empty_lines_skipped() {
1523 let doc_str = "/// Do it.\n///\n/// # Errors\n///\n/// \n/// Real error here.";
1524 let doc = ext().extract(doc_str, Language::Rust);
1525
1526 assert_eq!(doc.throws.len(), 1);
1528 assert!(doc.throws[0]
1529 .description
1530 .as_ref()
1531 .unwrap()
1532 .contains("Real error"));
1533 }
1534
1535 #[test]
1540 fn test_javadoc_parsing() {
1541 let javadoc = "/**\n * Process the data.\n *\n * @param input the input data\n * @return the processed result\n * @throws IOException if reading fails\n */";
1542 let doc = ext().extract(javadoc, Language::Java);
1543
1544 assert!(doc.summary.unwrap().contains("Process the data"));
1545 assert_eq!(doc.params.len(), 1);
1546 assert_eq!(doc.params[0].name, "input");
1547 assert!(doc.returns.is_some());
1548 assert_eq!(doc.throws.len(), 1);
1549 }
1550
1551 #[test]
1552 fn test_kotlin_delegates_to_javadoc() {
1553 let kdoc = "/**\n * Kotlin function.\n * @param name the name\n */";
1554 let doc = ext().extract(kdoc, Language::Kotlin);
1555
1556 assert!(doc.summary.unwrap().contains("Kotlin function"));
1557 assert_eq!(doc.params.len(), 1);
1558 }
1559
1560 #[test]
1565 fn test_go_doc_basic() {
1566 let go_doc = "// Calculate returns the sum of a and b. It panics on overflow.";
1567 let doc = ext().extract(go_doc, Language::Go);
1568
1569 assert_eq!(doc.summary.as_deref(), Some("Calculate returns the sum of a and b"));
1570 let desc = doc.description.unwrap();
1571 assert!(desc.contains("panics on overflow"));
1572 }
1573
1574 #[test]
1575 fn test_go_doc_multiline() {
1576 let go_doc = "// First line.\n// Second line.\n// Third line.";
1577 let doc = ext().extract(go_doc, Language::Go);
1578
1579 let desc = doc.description.unwrap();
1580 assert!(desc.contains("First line."));
1581 assert!(desc.contains("Third line."));
1582 }
1583
1584 #[test]
1585 fn test_go_doc_deprecated() {
1586 let go_doc = "// Deprecated: Use NewFunc instead.\n// This function is old.";
1587 let doc = ext().extract(go_doc, Language::Go);
1588
1589 assert!(doc.is_deprecated);
1590 }
1591
1592 #[test]
1593 fn test_go_doc_not_deprecated() {
1594 let go_doc = "// Process handles the request.";
1595 let doc = ext().extract(go_doc, Language::Go);
1596
1597 assert!(!doc.is_deprecated);
1598 }
1599
1600 #[test]
1605 fn test_ruby_yard_doc() {
1606 let yard = "# Calculate the sum.\n# @param [Integer] a the first number\n# @param [Integer] b the second number\n# @return [Integer] the sum\n# @raise [ArgumentError] if inputs are invalid";
1607 let doc = ext().extract(yard, Language::Ruby);
1608
1609 assert!(doc.summary.unwrap().contains("Calculate the sum"));
1610 assert_eq!(doc.params.len(), 2);
1611 assert_eq!(doc.params[0].name, "a");
1612 assert_eq!(doc.params[0].type_info.as_deref(), Some("Integer"));
1613 assert!(doc.returns.is_some());
1614 assert_eq!(doc.returns.unwrap().type_info.as_deref(), Some("Integer"));
1615 assert_eq!(doc.throws.len(), 1);
1616 assert_eq!(doc.throws[0].exception_type, "ArgumentError");
1617 }
1618
1619 #[test]
1620 fn test_ruby_description_only() {
1621 let yard = "# A simple helper method.";
1622 let doc = ext().extract(yard, Language::Ruby);
1623
1624 assert!(doc.summary.unwrap().contains("simple helper"));
1625 assert!(doc.params.is_empty());
1626 }
1627
1628 #[test]
1633 fn test_phpdoc_parsing() {
1634 let phpdoc =
1635 "/**\n * Send an email.\n * @param string $to Recipient address\n * @return bool\n */";
1636 let doc = ext().extract(phpdoc, Language::Php);
1637
1638 assert!(doc.summary.unwrap().contains("Send an email"));
1639 assert_eq!(doc.params.len(), 1);
1640 assert!(doc.returns.is_some());
1641 }
1642
1643 #[test]
1648 fn test_csharp_xml_doc() {
1649 let csharp_doc = "/// <summary>\n/// Calculates the area.\n/// </summary>\n/// <param name=\"width\">The width</param>\n/// <param name=\"height\">The height</param>\n/// <returns>The area value</returns>\n/// <exception cref=\"ArgumentException\">If negative</exception>";
1650 let doc = ext().extract(csharp_doc, Language::CSharp);
1651
1652 assert!(doc.summary.unwrap().contains("Calculates the area"));
1653 assert_eq!(doc.params.len(), 2);
1654 assert_eq!(doc.params[0].name, "width");
1655 assert_eq!(doc.params[1].name, "height");
1656 assert!(doc.returns.is_some());
1657 assert!(doc
1658 .returns
1659 .unwrap()
1660 .description
1661 .unwrap()
1662 .contains("area value"));
1663 assert_eq!(doc.throws.len(), 1);
1664 assert_eq!(doc.throws[0].exception_type, "ArgumentException");
1665 }
1666
1667 #[test]
1668 fn test_csharp_summary_only() {
1669 let csharp_doc = "/// <summary>Simple summary.</summary>";
1670 let doc = ext().extract(csharp_doc, Language::CSharp);
1671
1672 assert_eq!(doc.summary.as_deref(), Some("Simple summary."));
1673 assert!(doc.params.is_empty());
1674 }
1675
1676 #[test]
1681 fn test_swift_doc() {
1682 let swift_doc = "/// Calculates the distance.\n///\n/// - Parameter from: The start point\n/// - Parameter to: The end point\n/// - Returns: The distance\n/// - Throws: An error if coordinates are invalid";
1683 let doc = ext().extract(swift_doc, Language::Swift);
1684
1685 assert!(doc.summary.unwrap().contains("Calculates the distance"));
1686 assert_eq!(doc.params.len(), 2);
1687 assert_eq!(doc.params[0].name, "from");
1688 assert_eq!(doc.params[1].name, "to");
1689 assert!(doc.returns.is_some());
1690 assert_eq!(doc.throws.len(), 1);
1691 assert_eq!(doc.throws[0].exception_type, "Error");
1692 }
1693
1694 #[test]
1695 fn test_swift_description_only() {
1696 let swift_doc = "/// A simple utility function.";
1697 let doc = ext().extract(swift_doc, Language::Swift);
1698
1699 assert!(doc.summary.unwrap().contains("simple utility"));
1700 }
1701
1702 #[test]
1707 fn test_scaladoc_delegates() {
1708 let scaladoc = "/**\n * Scala function.\n * @param x the input\n * @return the output\n */";
1709 let doc = ext().extract(scaladoc, Language::Scala);
1710
1711 assert!(doc.summary.unwrap().contains("Scala function"));
1712 assert_eq!(doc.params.len(), 1);
1713 assert!(doc.returns.is_some());
1714 }
1715
1716 #[test]
1721 fn test_haddock_basic() {
1722 let haddock = "-- | Compute the factorial. It uses recursion.";
1723 let doc = ext().extract(haddock, Language::Haskell);
1724
1725 assert!(doc.summary.unwrap().contains("Compute the factorial"));
1726 assert!(doc.description.unwrap().contains("recursion"));
1727 }
1728
1729 #[test]
1730 fn test_haddock_multiline() {
1731 let haddock = "-- | First line.\n-- Second line.";
1732 let doc = ext().extract(haddock, Language::Haskell);
1733
1734 let desc = doc.description.unwrap();
1735 assert!(desc.contains("First line."));
1736 assert!(desc.contains("Second line."));
1737 }
1738
1739 #[test]
1740 fn test_haddock_caret_prefix() {
1741 let haddock = "-- ^ Argument documentation.";
1742 let doc = ext().extract(haddock, Language::Haskell);
1743
1744 assert!(doc.description.unwrap().contains("Argument documentation"));
1745 }
1746
1747 #[test]
1752 fn test_exdoc_basic() {
1753 let exdoc = "@doc \"\"\"\nFetches a user by ID.\n\n* id: The user identifier\n\"\"\"";
1754 let doc = ext().extract(exdoc, Language::Elixir);
1755
1756 assert!(doc.summary.unwrap().contains("Fetches a user by ID"));
1757 assert_eq!(doc.params.len(), 1);
1758 assert_eq!(doc.params[0].name, "id");
1759 }
1760
1761 #[test]
1762 fn test_exdoc_moduledoc() {
1763 let exdoc = "@moduledoc \"\"\"\nThis module handles authentication.\n\"\"\"";
1764 let doc = ext().extract(exdoc, Language::Elixir);
1765
1766 assert!(doc.summary.unwrap().contains("authentication"));
1767 }
1768
1769 #[test]
1770 fn test_exdoc_dash_list_params() {
1771 let exdoc = "@doc \"\"\"\nDo stuff.\n\n- name: The name\n- age: The age\n\"\"\"";
1772 let doc = ext().extract(exdoc, Language::Elixir);
1773
1774 assert_eq!(doc.params.len(), 2);
1775 assert_eq!(doc.params[0].name, "name");
1776 assert_eq!(doc.params[1].name, "age");
1777 }
1778
1779 #[test]
1784 fn test_clojure_doc_basic() {
1785 let clj = "\"Adds two numbers together. Returns their sum.\"";
1786 let doc = ext().extract(clj, Language::Clojure);
1787
1788 assert!(doc.summary.unwrap().contains("Adds two numbers together"));
1789 assert!(doc.description.unwrap().contains("Returns their sum"));
1790 }
1791
1792 #[test]
1793 fn test_clojure_doc_no_period() {
1794 let clj = "\"Simple function without a period\"";
1795 let doc = ext().extract(clj, Language::Clojure);
1796
1797 assert_eq!(doc.summary.as_deref(), Some("Simple function without a period"));
1798 }
1799
1800 #[test]
1805 fn test_ocamldoc_basic() {
1806 let ocaml = "(** Compute the length.\n@param lst the input list\n@return the number of elements\n@raise Invalid_argument if list is circular\n*)";
1807 let doc = ext().extract(ocaml, Language::OCaml);
1808
1809 assert!(doc.summary.unwrap().contains("Compute the length"));
1810 assert_eq!(doc.params.len(), 1);
1811 assert_eq!(doc.params[0].name, "lst");
1812 assert!(doc.returns.is_some());
1813 assert_eq!(doc.throws.len(), 1);
1814 assert_eq!(doc.throws[0].exception_type, "Invalid_argument");
1815 }
1816
1817 #[test]
1822 fn test_luadoc_basic() {
1823 let lua = "--- Process the data.\n--- @param input string The input data\n--- @return boolean True on success";
1824 let doc = ext().extract(lua, Language::Lua);
1825
1826 assert!(doc.summary.unwrap().contains("Process the data"));
1827 assert_eq!(doc.params.len(), 1);
1828 assert_eq!(doc.params[0].name, "input");
1829 assert_eq!(doc.params[0].type_info.as_deref(), Some("string"));
1830 assert!(doc.returns.is_some());
1831 assert_eq!(doc.returns.unwrap().type_info.as_deref(), Some("boolean"));
1832 }
1833
1834 #[test]
1839 fn test_roxygen_basic() {
1840 let rox =
1841 "#' Calculate the mean.\n#' @param x A numeric vector\n#' @return The arithmetic mean";
1842 let doc = ext().extract(rox, Language::R);
1843
1844 assert!(doc.summary.unwrap().contains("Calculate the mean"));
1845 assert_eq!(doc.params.len(), 1);
1846 assert_eq!(doc.params[0].name, "x");
1847 assert!(doc.returns.is_some());
1848 }
1849
1850 #[test]
1855 fn test_doxygen_basic() {
1856 let dox = "/**\n * @brief Calculate the sum.\n * @param a First operand\n * @param b Second operand\n * @return The sum\n * @throws std::overflow_error On overflow\n */";
1857 let doc = ext().extract(dox, Language::Cpp);
1858
1859 assert_eq!(doc.summary.as_deref(), Some("Calculate the sum."));
1860 assert_eq!(doc.params.len(), 2);
1861 assert_eq!(doc.params[0].name, "a");
1862 assert_eq!(doc.params[1].name, "b");
1863 assert!(doc.returns.is_some());
1864 assert_eq!(doc.throws.len(), 1);
1865 }
1866
1867 #[test]
1868 fn test_doxygen_c_dispatch() {
1869 let dox = "/**\n * @brief A C function.\n * @param x input\n */";
1870 let doc = ext().extract(dox, Language::C);
1871
1872 assert_eq!(doc.summary.as_deref(), Some("A C function."));
1873 assert_eq!(doc.params.len(), 1);
1874 }
1875
1876 #[test]
1877 fn test_doxygen_backslash_syntax() {
1878 let dox = "/**\n * \\brief Backslash style.\n * \\param n count\n * \\return the result\n * \\throws bad_alloc on memory failure\n */";
1879 let doc = ext().extract(dox, Language::Cpp);
1880
1881 assert_eq!(doc.summary.as_deref(), Some("Backslash style."));
1882 assert_eq!(doc.params.len(), 1);
1883 assert_eq!(doc.params[0].name, "n");
1884 assert!(doc.returns.is_some());
1885 assert_eq!(doc.throws.len(), 1);
1886 }
1887
1888 #[test]
1889 fn test_doxygen_param_direction() {
1890 let dox =
1891 "/**\n * @param[in] x input\n * @param[out] y output\n * @param[in,out] z both\n */";
1892 let doc = ext().extract(dox, Language::Cpp);
1893
1894 assert_eq!(doc.params.len(), 3);
1895 assert_eq!(doc.params[0].name, "x");
1896 assert_eq!(doc.params[1].name, "y");
1897 assert_eq!(doc.params[2].name, "z");
1898 }
1899
1900 #[test]
1901 fn test_doxygen_no_brief_uses_first_line() {
1902 let dox = "/**\n * First line as description.\n * @param x input\n */";
1903 let doc = ext().extract(dox, Language::Cpp);
1904
1905 let summary = doc.summary.unwrap();
1907 assert!(summary.is_empty() || summary.contains("First line as description"));
1910 }
1911
1912 #[test]
1917 fn test_bash_comment_basic() {
1918 let bash = "# Deploy the application. Restarts the service.";
1919 let doc = ext().extract(bash, Language::Bash);
1920
1921 assert!(doc.summary.unwrap().contains("Deploy the application"));
1922 }
1923
1924 #[test]
1925 fn test_bash_multiline_comment() {
1926 let bash = "# First line.\n# Second line.\n# Third line.";
1927 let doc = ext().extract(bash, Language::Bash);
1928
1929 let desc = doc.description.unwrap();
1930 assert!(desc.contains("First line."));
1931 assert!(desc.contains("Third line."));
1932 }
1933
1934 #[test]
1935 fn test_bash_empty_comment_lines_filtered() {
1936 let bash = "# Content here.\n#\n# More content.";
1937 let doc = ext().extract(bash, Language::Bash);
1938
1939 let desc = doc.description.unwrap();
1940 assert!(desc.contains("Content here."));
1941 assert!(desc.contains("More content."));
1942 }
1943
1944 #[test]
1949 fn test_generic_fallback() {
1950 let comment = "// A generic comment.";
1952 let doc = ext().extract(comment, Language::FSharp);
1953
1954 assert!(doc.summary.unwrap().contains("generic comment"));
1955 }
1956
1957 #[test]
1958 fn test_generic_strips_various_markers() {
1959 let comment = "/* Block comment content */";
1960 let doc = ext().extract(comment, Language::FSharp);
1961
1962 assert!(doc.description.unwrap().contains("Block comment content"));
1963 }
1964
1965 #[test]
1966 fn test_generic_hash_comment() {
1967 let comment = "# Hash comment content";
1968 let doc = ext().extract(comment, Language::FSharp);
1969
1970 assert!(doc.description.unwrap().contains("Hash comment content"));
1971 }
1972
1973 #[test]
1974 fn test_generic_double_dash() {
1975 let comment = "-- SQL style comment";
1976 let doc = ext().extract(comment, Language::FSharp);
1977
1978 assert!(doc.description.unwrap().contains("SQL style comment"));
1979 }
1980
1981 #[test]
1982 fn test_generic_semicolon_comment() {
1983 let comment = ";; Lisp-style comment";
1984 let doc = ext().extract(comment, Language::FSharp);
1985
1986 assert!(doc.description.unwrap().contains("Lisp-style comment"));
1987 }
1988
1989 #[test]
1994 fn test_default_creates_extractor() {
1995 let ext: DocumentationExtractor = Default::default();
1996 let doc = ext.extract("/// Hello.", Language::Rust);
1997 assert!(doc.summary.is_some());
1998 }
1999
2000 #[test]
2005 fn test_raw_field_preserved() {
2006 let input = "/// Some doc.";
2007 let doc = ext().extract(input, Language::Rust);
2008 assert_eq!(doc.raw.as_deref(), Some("/// Some doc."));
2009 }
2010
2011 #[test]
2012 fn test_raw_field_preserved_python() {
2013 let input = "\"\"\"Some doc.\"\"\"";
2014 let doc = ext().extract(input, Language::Python);
2015 assert_eq!(doc.raw.as_deref(), Some("\"\"\"Some doc.\"\"\""));
2016 }
2017
2018 #[test]
2023 fn test_jsdoc_special_characters() {
2024 let jsdoc =
2025 "/**\n * Process <T> & handle \"quotes\".\n * @param {Array<string>} items - The items\n */";
2026 let doc = ext().extract(jsdoc, Language::JavaScript);
2027
2028 assert!(doc.description.unwrap().contains("<T>"));
2029 assert_eq!(doc.params.len(), 1);
2030 assert_eq!(doc.params[0].type_info.as_deref(), Some("Array<string>"));
2031 }
2032
2033 #[test]
2034 fn test_python_docstring_with_code_block() {
2035 let docstring =
2036 "\"\"\"Process data.\n\nExample:\n ```python\n result = process(data)\n ```\n\"\"\"";
2037 let doc = ext().extract(docstring, Language::Python);
2038
2039 assert_eq!(doc.examples.len(), 1);
2040 assert!(doc.examples[0].code.contains("process(data)"));
2041 }
2042
2043 #[test]
2044 fn test_jsdoc_with_unicode() {
2045 let jsdoc = "/**\n * Calculate \u{03C0} (pi) approximation.\n * @param {number} n - Number of iterations\n */";
2046 let doc = ext().extract(jsdoc, Language::JavaScript);
2047
2048 assert!(doc.description.unwrap().contains('\u{03C0}'));
2049 assert_eq!(doc.params.len(), 1);
2050 }
2051
2052 #[test]
2057 fn test_strip_comment_markers_basic() {
2058 let e = ext();
2059 let result = e.strip_comment_markers("/** line1\n * line2\n */", "/**", "*/", "*");
2060 assert!(result.contains("line1"));
2061 assert!(result.contains("line2"));
2062 assert!(!result.contains("* line2"));
2064 }
2065
2066 #[test]
2067 fn test_strip_comment_markers_no_prefix_match() {
2068 let e = ext();
2069 let result =
2070 e.strip_comment_markers("/** no prefix lines\nplain line\n */", "/**", "*/", "*");
2071 assert!(result.contains("plain line"));
2072 }
2073
2074 #[test]
2079 fn test_strip_rust_doc_markers_triple_slash() {
2080 let e = ext();
2081 let result = e.strip_rust_doc_markers("/// Hello\n/// World");
2082 assert!(result.contains("Hello"));
2083 assert!(result.contains("World"));
2084 }
2085
2086 #[test]
2087 fn test_strip_rust_doc_markers_inner() {
2088 let e = ext();
2089 let result = e.strip_rust_doc_markers("//! Module doc\n//! More");
2090 assert!(result.contains("Module doc"));
2091 assert!(result.contains("More"));
2092 }
2093
2094 #[test]
2095 fn test_strip_rust_doc_markers_block_style() {
2096 let e = ext();
2097 let result = e.strip_rust_doc_markers("/** Block\n * content\n */");
2098 assert!(result.contains("Block"));
2099 assert!(result.contains("content"));
2100 }
2101
2102 #[test]
2103 fn test_strip_rust_doc_markers_closing_only() {
2104 let e = ext();
2105 let result = e.strip_rust_doc_markers("*/");
2106 assert_eq!(result, "/");
2110 }
2111
2112 #[test]
2113 fn test_strip_rust_doc_markers_plain_line() {
2114 let e = ext();
2115 let result = e.strip_rust_doc_markers("plain text without markers");
2116 assert!(result.contains("plain text without markers"));
2117 }
2118
2119 #[test]
2124 fn test_deprecated_detection() {
2125 let jsdoc = r#"/**
2126 * Old function.
2127 * @deprecated Use newFunction instead
2128 */
2129 "#;
2130
2131 let doc = ext().extract(jsdoc, Language::JavaScript);
2132
2133 assert!(doc.is_deprecated);
2134 assert!(doc.deprecation_message.is_some());
2135 }
2136}