1use crate::{AuditError, Result};
8use regex::Regex;
9use std::path::{Path, PathBuf};
10
11#[derive(Debug)]
13pub struct DocumentationParser {
14 workspace_version: String,
16 #[allow(dead_code)]
18 rust_version: String,
19 patterns: ParserPatterns,
21}
22
23#[derive(Debug)]
25struct ParserPatterns {
26 code_block: Regex,
28 api_reference: Regex,
30 version_reference: Regex,
32 internal_link: Regex,
34 feature_flag: Regex,
36 rust_version: Regex,
38 toml_dependency: Regex,
40}
41
42#[derive(Debug, Clone, PartialEq)]
44pub struct ParsedDocument {
45 pub file_path: PathBuf,
47 pub code_examples: Vec<CodeExample>,
49 pub api_references: Vec<ApiReference>,
51 pub version_references: Vec<VersionReference>,
53 pub internal_links: Vec<InternalLink>,
55 pub feature_mentions: Vec<FeatureMention>,
57}
58
59#[derive(Debug, Clone, PartialEq)]
61pub struct CodeExample {
62 pub content: String,
64 pub language: String,
66 pub line_number: usize,
68 pub is_runnable: bool,
70 pub attributes: Vec<String>,
72}
73
74#[derive(Debug, Clone, PartialEq)]
76pub struct ApiReference {
77 pub crate_name: String,
79 pub item_path: String,
81 pub item_type: ApiItemType,
83 pub line_number: usize,
85 pub context: String,
87}
88
89#[derive(Debug, Clone, PartialEq, Eq)]
91pub enum ApiItemType {
92 Struct,
94 Function,
96 Method,
98 Trait,
100 Enum,
102 Constant,
104 Module,
106 TypeAlias,
108 Unknown,
110}
111
112#[derive(Debug, Clone, PartialEq)]
114pub struct VersionReference {
115 pub version: String,
117 pub version_type: VersionType,
119 pub line_number: usize,
121 pub context: String,
123}
124
125#[derive(Debug, Clone, PartialEq, Eq)]
127pub enum VersionType {
128 CrateVersion,
130 RustVersion,
132 WorkspaceVersion,
134 Generic,
136}
137
138#[derive(Debug, Clone, PartialEq)]
140pub struct InternalLink {
141 pub target: String,
143 pub text: String,
145 pub line_number: usize,
147 pub is_relative: bool,
149}
150
151#[derive(Debug, Clone, PartialEq)]
153pub struct FeatureMention {
154 pub feature_name: String,
156 pub crate_name: Option<String>,
158 pub line_number: usize,
160 pub context: String,
162}
163
164impl DocumentationParser {
165 pub fn new(workspace_version: String, rust_version: String) -> Result<Self> {
176 let patterns = ParserPatterns::new()?;
177
178 Ok(Self { workspace_version, rust_version, patterns })
179 }
180
181 pub async fn parse_file(&self, file_path: &Path) -> Result<ParsedDocument> {
191 let content = tokio::fs::read_to_string(file_path).await.map_err(|e| {
192 AuditError::IoError { path: file_path.to_path_buf(), details: e.to_string() }
193 })?;
194
195 self.parse_content(file_path, &content)
196 }
197
198 pub fn parse_content(&self, file_path: &Path, content: &str) -> Result<ParsedDocument> {
209 let lines: Vec<&str> = content.lines().collect();
210
211 let code_examples = self.extract_code_examples(&lines)?;
212 let api_references = self.extract_api_references(&lines)?;
213 let version_references = self.extract_version_references(&lines)?;
214 let internal_links = self.extract_internal_links(&lines)?;
215 let feature_mentions = self.extract_feature_mentions(&lines)?;
216
217 Ok(ParsedDocument {
218 file_path: file_path.to_path_buf(),
219 code_examples,
220 api_references,
221 version_references,
222 internal_links,
223 feature_mentions,
224 })
225 }
226
227 pub fn extract_rust_examples(&self, content: &str) -> Result<Vec<CodeExample>> {
232 let lines: Vec<&str> = content.lines().collect();
233 let all_examples = self.extract_code_examples(&lines)?;
234
235 let rust_examples: Vec<CodeExample> = all_examples
237 .into_iter()
238 .filter(|example| example.language == "rust")
239 .map(|mut example| {
240 example.is_runnable = self.should_compile_rust_example(&example);
242 example
243 })
244 .collect();
245
246 Ok(rust_examples)
247 }
248
249 pub fn extract_configuration_examples(&self, content: &str) -> Result<Vec<CodeExample>> {
254 let lines: Vec<&str> = content.lines().collect();
255 let all_examples = self.extract_code_examples(&lines)?;
256
257 let config_examples: Vec<CodeExample> = all_examples
259 .into_iter()
260 .filter(|example| matches!(example.language.as_str(), "toml" | "yaml" | "yml" | "json"))
261 .collect();
262
263 Ok(config_examples)
264 }
265
266 fn should_compile_rust_example(&self, example: &CodeExample) -> bool {
271 if example.attributes.contains(&"ignore".to_string())
273 || example.attributes.contains(&"no_run".to_string())
274 || example.attributes.contains(&"compile_fail".to_string())
275 {
276 return false;
277 }
278
279 let content = &example.content;
281
282 if content.contains("// ...")
284 || content.contains("/* ... */")
285 || content.trim().starts_with("use ") || content.trim().starts_with("//") || content.lines().count() < 2
288 {
289 return false;
291 }
292
293 if content.contains("fn example(")
295 || content.contains("struct Example")
296 || content.contains("// Example:")
297 {
298 return false;
299 }
300
301 if content.contains("fn main(")
303 || content.contains("#[test]")
304 || content.contains("#[tokio::main]")
305 {
306 return true;
307 }
308
309 if content.contains("adk_") && (content.contains(".await") || content.contains("async")) {
311 return true;
312 }
313
314 true
316 }
317
318 fn extract_code_examples(&self, lines: &[&str]) -> Result<Vec<CodeExample>> {
320 let mut examples = Vec::new();
321 let mut in_code_block = false;
322 let mut current_code = String::new();
323 let mut current_language = String::new();
324 let mut current_attributes = Vec::new();
325 let mut start_line = 0;
326
327 for (line_num, line) in lines.iter().enumerate() {
328 if let Some(captures) = self.patterns.code_block.captures(line) {
329 if line.starts_with("```") {
330 if in_code_block {
331 let is_runnable =
333 self.is_code_runnable(¤t_language, ¤t_attributes);
334
335 examples.push(CodeExample {
336 content: current_code.trim().to_string(),
337 language: current_language.clone(),
338 line_number: start_line + 1, is_runnable,
340 attributes: current_attributes.clone(),
341 });
342
343 current_code.clear();
345 current_language.clear();
346 current_attributes.clear();
347 in_code_block = false;
348 } else {
349 if let Some(lang_match) = captures.get(1) {
351 let lang_spec = lang_match.as_str();
352 let (language, attributes) = self.parse_language_spec(lang_spec);
353 current_language = language;
354 current_attributes = attributes;
355 }
356 start_line = line_num;
357 in_code_block = true;
358 }
359 }
360 } else if in_code_block {
361 current_code.push_str(line);
362 current_code.push('\n');
363 }
364 }
365
366 Ok(examples)
367 }
368
369 fn extract_api_references(&self, lines: &[&str]) -> Result<Vec<ApiReference>> {
371 let mut references = Vec::new();
372
373 for (line_num, line) in lines.iter().enumerate() {
374 for captures in self.patterns.api_reference.captures_iter(line) {
375 if let Some(api_match) = captures.get(0) {
376 let full_path = api_match.as_str();
377 let (crate_name, item_path, item_type) = self.parse_api_path(full_path);
378
379 references.push(ApiReference {
380 crate_name,
381 item_path: item_path.to_string(),
382 item_type,
383 line_number: line_num + 1,
384 context: line.to_string(),
385 });
386 }
387 }
388 }
389
390 Ok(references)
391 }
392
393 fn extract_version_references(&self, lines: &[&str]) -> Result<Vec<VersionReference>> {
395 let mut references = Vec::new();
396
397 for (line_num, line) in lines.iter().enumerate() {
398 for captures in self.patterns.rust_version.captures_iter(line) {
400 if let Some(version_match) = captures.get(1) {
401 references.push(VersionReference {
402 version: version_match.as_str().to_string(),
403 version_type: VersionType::RustVersion,
404 line_number: line_num + 1,
405 context: line.to_string(),
406 });
407 }
408 }
409
410 for captures in self.patterns.version_reference.captures_iter(line) {
412 if let Some(version_match) = captures.get(1) {
413 let version_type = self.classify_version_type(line, version_match.as_str());
414
415 references.push(VersionReference {
416 version: version_match.as_str().to_string(),
417 version_type,
418 line_number: line_num + 1,
419 context: line.to_string(),
420 });
421 }
422 }
423 }
424
425 Ok(references)
426 }
427
428 fn extract_internal_links(&self, lines: &[&str]) -> Result<Vec<InternalLink>> {
430 let mut links = Vec::new();
431
432 for (line_num, line) in lines.iter().enumerate() {
433 for captures in self.patterns.internal_link.captures_iter(line) {
434 if let (Some(text_match), Some(target_match)) = (captures.get(1), captures.get(2)) {
435 let target = target_match.as_str();
436 let is_relative = !target.starts_with("http") && !target.starts_with('#');
437
438 links.push(InternalLink {
439 target: target.to_string(),
440 text: text_match.as_str().to_string(),
441 line_number: line_num + 1,
442 is_relative,
443 });
444 }
445 }
446 }
447
448 Ok(links)
449 }
450
451 fn extract_feature_mentions(&self, lines: &[&str]) -> Result<Vec<FeatureMention>> {
453 let mut mentions = Vec::new();
454
455 for (line_num, line) in lines.iter().enumerate() {
456 for captures in self.patterns.feature_flag.captures_iter(line) {
457 if let Some(feature_match) = captures.get(1) {
458 let feature_name = feature_match.as_str().to_string();
459 let crate_name = self.extract_crate_from_context(line);
460
461 mentions.push(FeatureMention {
462 feature_name,
463 crate_name,
464 line_number: line_num + 1,
465 context: line.to_string(),
466 });
467 }
468 }
469 }
470
471 Ok(mentions)
472 }
473
474 fn is_code_runnable(&self, language: &str, attributes: &[String]) -> bool {
476 if language == "rust" {
478 !attributes.contains(&"ignore".to_string())
479 && !attributes.contains(&"no_run".to_string())
480 && !attributes.contains(&"compile_fail".to_string())
481 } else {
482 false
484 }
485 }
486
487 fn parse_language_spec(&self, lang_spec: &str) -> (String, Vec<String>) {
489 let parts: Vec<&str> = lang_spec.split(',').map(|s| s.trim()).collect();
490
491 if parts.is_empty() {
492 return ("text".to_string(), Vec::new());
493 }
494
495 let language = parts[0].to_string();
496 let attributes = parts[1..].iter().map(|s| s.to_string()).collect();
497
498 (language, attributes)
499 }
500
501 fn parse_api_path(&self, full_path: &str) -> (String, String, ApiItemType) {
503 let parts: Vec<&str> = full_path.split("::").collect();
504
505 if parts.is_empty() {
506 return ("unknown".to_string(), full_path.to_string(), ApiItemType::Unknown);
507 }
508
509 let crate_name = parts[0].to_string();
510 let item_path = full_path.to_string();
511
512 let item_type = if let Some(last_part) = parts.last() {
514 self.infer_api_item_type(last_part)
515 } else {
516 ApiItemType::Unknown
517 };
518
519 (crate_name, item_path, item_type)
520 }
521
522 fn infer_api_item_type(&self, item_name: &str) -> ApiItemType {
524 if item_name.chars().next().is_some_and(|c| c.is_uppercase()) {
526 if item_name.ends_with("Error") || item_name.ends_with("Result") {
528 ApiItemType::Enum
529 } else {
530 ApiItemType::Struct
531 }
532 } else if item_name.contains('(') || item_name.ends_with("()") {
533 ApiItemType::Function
535 } else if item_name.chars().all(|c| c.is_uppercase() || c == '_') {
536 ApiItemType::Constant
538 } else {
539 ApiItemType::Unknown
541 }
542 }
543
544 fn classify_version_type(&self, line: &str, version: &str) -> VersionType {
546 if line.contains("rust-version") || line.contains("rustc") {
547 VersionType::RustVersion
548 } else if line.contains("adk-") || version == self.workspace_version {
549 VersionType::WorkspaceVersion
550 } else if line.contains("version") && line.contains("=") {
551 VersionType::CrateVersion
552 } else {
553 VersionType::Generic
554 }
555 }
556
557 fn extract_crate_from_context(&self, line: &str) -> Option<String> {
559 if let Some(captures) = self.patterns.toml_dependency.captures(line) {
561 if let Some(crate_match) = captures.get(1) {
562 return Some(crate_match.as_str().to_string());
563 }
564 }
565
566 if let Some(captures) = self.patterns.api_reference.captures(line) {
568 if let Some(crate_match) = captures.get(1) {
569 return Some(crate_match.as_str().to_string());
570 }
571 }
572
573 None
574 }
575}
576
577impl ParserPatterns {
578 fn new() -> Result<Self> {
580 Ok(Self {
581 code_block: Regex::new(r"^```(\w+(?:,\w+)*)?").map_err(|e| AuditError::RegexError {
582 pattern: "code_block".to_string(),
583 details: e.to_string(),
584 })?,
585
586 api_reference: Regex::new(r"\b(adk_\w+)::([\w:]+)").map_err(|e| {
587 AuditError::RegexError {
588 pattern: "api_reference".to_string(),
589 details: e.to_string(),
590 }
591 })?,
592
593 version_reference: Regex::new(r#"version\s*=\s*"([^"]+)""#).map_err(|e| {
594 AuditError::RegexError {
595 pattern: "version_reference".to_string(),
596 details: e.to_string(),
597 }
598 })?,
599
600 internal_link: Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").map_err(|e| {
601 AuditError::RegexError {
602 pattern: "internal_link".to_string(),
603 details: e.to_string(),
604 }
605 })?,
606
607 feature_flag: Regex::new(r#"features?\s*=\s*\[?"([^"\]]+)""#).map_err(|e| {
608 AuditError::RegexError {
609 pattern: "feature_flag".to_string(),
610 details: e.to_string(),
611 }
612 })?,
613
614 rust_version: Regex::new(r#"rust-version\s*=\s*"([^"]+)""#).map_err(|e| {
615 AuditError::RegexError {
616 pattern: "rust_version".to_string(),
617 details: e.to_string(),
618 }
619 })?,
620
621 toml_dependency: Regex::new(r#"^([a-zA-Z0-9_-]+)\s*=\s*\{"#).map_err(|e| {
622 AuditError::RegexError {
623 pattern: "toml_dependency".to_string(),
624 details: e.to_string(),
625 }
626 })?,
627 })
628 }
629}
630
631impl Default for ParsedDocument {
632 fn default() -> Self {
633 Self {
634 file_path: PathBuf::new(),
635 code_examples: Vec::new(),
636 api_references: Vec::new(),
637 version_references: Vec::new(),
638 internal_links: Vec::new(),
639 feature_mentions: Vec::new(),
640 }
641 }
642}
643
644#[cfg(test)]
645mod tests {
646 use super::*;
647 use std::path::PathBuf;
648
649 fn create_test_parser() -> DocumentationParser {
650 DocumentationParser::new("0.1.0".to_string(), "1.85.0".to_string()).unwrap()
651 }
652
653 #[test]
654 fn test_parser_creation() {
655 let parser = create_test_parser();
656 assert_eq!(parser.workspace_version, "0.1.0");
657 assert_eq!(parser.rust_version, "1.85.0");
658 }
659
660 #[test]
661 fn test_code_block_extraction() {
662 let parser = create_test_parser();
663 let content = r#"
664# Example
665
666Here's some Rust code:
667
668```rust
669fn main() {
670 println!("Hello, world!");
671}
672```
673
674And some TOML:
675
676```toml
677[dependencies]
678serde = "1.0"
679```
680"#;
681
682 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
683
684 assert_eq!(result.code_examples.len(), 2);
685
686 let rust_example = &result.code_examples[0];
687 assert_eq!(rust_example.language, "rust");
688 assert!(rust_example.is_runnable);
689 assert!(rust_example.content.contains("println!"));
690
691 let toml_example = &result.code_examples[1];
692 assert_eq!(toml_example.language, "toml");
693 assert!(!toml_example.is_runnable);
694 }
695
696 #[test]
697 fn test_api_reference_extraction() {
698 let parser = create_test_parser();
699 let content = r#"
700Use `adk_core::Agent` for creating agents.
701The `adk_model::Llm::generate` method is useful.
702"#;
703
704 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
705
706 assert_eq!(result.api_references.len(), 2);
707
708 let first_ref = &result.api_references[0];
709 assert_eq!(first_ref.crate_name, "adk_core");
710 assert_eq!(first_ref.item_path, "adk_core::Agent");
711
712 let second_ref = &result.api_references[1];
713 assert_eq!(second_ref.crate_name, "adk_model");
714 assert_eq!(second_ref.item_path, "adk_model::Llm::generate");
715 }
716
717 #[test]
718 fn test_version_reference_extraction() {
719 let parser = create_test_parser();
720 let content = r#"
721```toml
722[dependencies]
723adk-core = { version = "0.1.0" }
724serde = { version = "1.0.195" }
725
726[package]
727rust-version = "1.85.0"
728```
729"#;
730
731 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
732
733 assert!(!result.version_references.is_empty());
735 }
736
737 #[test]
738 fn test_internal_link_extraction() {
739 let parser = create_test_parser();
740 let content = r#"
741See the [Getting Started](./getting-started.md) guide.
742Check out [API Reference](../api/index.md) for details.
743"#;
744
745 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
746
747 assert_eq!(result.internal_links.len(), 2);
748
749 let first_link = &result.internal_links[0];
750 assert_eq!(first_link.text, "Getting Started");
751 assert_eq!(first_link.target, "./getting-started.md");
752 assert!(first_link.is_relative);
753 }
754
755 #[test]
756 fn test_feature_mention_extraction() {
757 let parser = create_test_parser();
758 let content = r#"
759```toml
760[dependencies]
761adk-core = { version = "0.1.0", features = ["async"] }
762```
763
764Enable the `cuda` feature for GPU acceleration.
765"#;
766
767 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
768
769 assert!(!result.feature_mentions.is_empty());
771 }
772
773 #[test]
774 fn test_code_attributes_parsing() {
775 let parser = create_test_parser();
776 let content = r#"
777```rust,ignore
778// This code is ignored
779fn ignored_example() {}
780```
781
782```rust,no_run
783// This code doesn't run
784fn no_run_example() {}
785```
786"#;
787
788 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
789
790 assert_eq!(result.code_examples.len(), 2);
791
792 let ignored_example = &result.code_examples[0];
793 assert!(!ignored_example.is_runnable);
794 assert!(ignored_example.attributes.contains(&"ignore".to_string()));
795
796 let no_run_example = &result.code_examples[1];
797 assert!(!no_run_example.is_runnable);
798 assert!(no_run_example.attributes.contains(&"no_run".to_string()));
799 }
800
801 #[test]
802 fn test_rust_example_extraction() {
803 let parser = create_test_parser();
804 let content = r#"
805```rust
806fn main() {
807 println!("This should be runnable");
808}
809```
810
811```rust,ignore
812fn ignored() {}
813```
814
815```toml
816[dependencies]
817serde = "1.0"
818```
819
820```rust
821// Just a comment
822```
823"#;
824
825 let rust_examples = parser.extract_rust_examples(content).unwrap();
826
827 assert_eq!(rust_examples.len(), 3);
829
830 assert!(rust_examples[0].is_runnable);
832 assert!(rust_examples[0].content.contains("main"));
833
834 assert!(!rust_examples[1].is_runnable);
836
837 assert!(!rust_examples[2].is_runnable);
839 }
840
841 #[test]
842 fn test_configuration_example_extraction() {
843 let parser = create_test_parser();
844 let content = r#"
845```toml
846[dependencies]
847adk-core = "0.1.0"
848```
849
850```yaml
851version: "3.8"
852services:
853 app:
854 image: rust:latest
855```
856
857```rust
858fn main() {}
859```
860"#;
861
862 let config_examples = parser.extract_configuration_examples(content).unwrap();
863
864 assert_eq!(config_examples.len(), 2);
866
867 assert_eq!(config_examples[0].language, "toml");
868 assert_eq!(config_examples[1].language, "yaml");
869 }
870
871 #[test]
872 fn test_enhanced_feature_detection() {
873 let parser = create_test_parser();
874 let content = r#"
875Enable the `cuda` feature for GPU acceleration:
876
877```toml
878[dependencies]
879adk-mistralrs = { version = "0.1.0", features = ["cuda", "flash-attn"] }
880```
881
882You can also use the `async` feature with adk-core.
883"#;
884
885 let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
886
887 assert!(!result.feature_mentions.is_empty());
889
890 let config_examples = parser.extract_configuration_examples(content).unwrap();
892 assert_eq!(config_examples.len(), 1);
893 assert!(config_examples[0].content.contains("features"));
894 }
895}