use crate::{AuditError, Result};
use regex::Regex;
use std::path::{Path, PathBuf};
#[derive(Debug)]
pub struct DocumentationParser {
workspace_version: String,
#[allow(dead_code)]
rust_version: String,
patterns: ParserPatterns,
}
#[derive(Debug)]
struct ParserPatterns {
code_block: Regex,
api_reference: Regex,
version_reference: Regex,
internal_link: Regex,
feature_flag: Regex,
rust_version: Regex,
toml_dependency: Regex,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ParsedDocument {
pub file_path: PathBuf,
pub code_examples: Vec<CodeExample>,
pub api_references: Vec<ApiReference>,
pub version_references: Vec<VersionReference>,
pub internal_links: Vec<InternalLink>,
pub feature_mentions: Vec<FeatureMention>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CodeExample {
pub content: String,
pub language: String,
pub line_number: usize,
pub is_runnable: bool,
pub attributes: Vec<String>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ApiReference {
pub crate_name: String,
pub item_path: String,
pub item_type: ApiItemType,
pub line_number: usize,
pub context: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum ApiItemType {
Struct,
Function,
Method,
Trait,
Enum,
Constant,
Module,
TypeAlias,
Unknown,
}
#[derive(Debug, Clone, PartialEq)]
pub struct VersionReference {
pub version: String,
pub version_type: VersionType,
pub line_number: usize,
pub context: String,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum VersionType {
CrateVersion,
RustVersion,
WorkspaceVersion,
Generic,
}
#[derive(Debug, Clone, PartialEq)]
pub struct InternalLink {
pub target: String,
pub text: String,
pub line_number: usize,
pub is_relative: bool,
}
#[derive(Debug, Clone, PartialEq)]
pub struct FeatureMention {
pub feature_name: String,
pub crate_name: Option<String>,
pub line_number: usize,
pub context: String,
}
impl DocumentationParser {
pub fn new(workspace_version: String, rust_version: String) -> Result<Self> {
let patterns = ParserPatterns::new()?;
Ok(Self { workspace_version, rust_version, patterns })
}
pub async fn parse_file(&self, file_path: &Path) -> Result<ParsedDocument> {
let content = tokio::fs::read_to_string(file_path).await.map_err(|e| {
AuditError::IoError { path: file_path.to_path_buf(), details: e.to_string() }
})?;
self.parse_content(file_path, &content)
}
pub fn parse_content(&self, file_path: &Path, content: &str) -> Result<ParsedDocument> {
let lines: Vec<&str> = content.lines().collect();
let code_examples = self.extract_code_examples(&lines)?;
let api_references = self.extract_api_references(&lines)?;
let version_references = self.extract_version_references(&lines)?;
let internal_links = self.extract_internal_links(&lines)?;
let feature_mentions = self.extract_feature_mentions(&lines)?;
Ok(ParsedDocument {
file_path: file_path.to_path_buf(),
code_examples,
api_references,
version_references,
internal_links,
feature_mentions,
})
}
pub fn extract_rust_examples(&self, content: &str) -> Result<Vec<CodeExample>> {
let lines: Vec<&str> = content.lines().collect();
let all_examples = self.extract_code_examples(&lines)?;
let rust_examples: Vec<CodeExample> = all_examples
.into_iter()
.filter(|example| example.language == "rust")
.map(|mut example| {
example.is_runnable = self.should_compile_rust_example(&example);
example
})
.collect();
Ok(rust_examples)
}
pub fn extract_configuration_examples(&self, content: &str) -> Result<Vec<CodeExample>> {
let lines: Vec<&str> = content.lines().collect();
let all_examples = self.extract_code_examples(&lines)?;
let config_examples: Vec<CodeExample> = all_examples
.into_iter()
.filter(|example| matches!(example.language.as_str(), "toml" | "yaml" | "yml" | "json"))
.collect();
Ok(config_examples)
}
fn should_compile_rust_example(&self, example: &CodeExample) -> bool {
if example.attributes.contains(&"ignore".to_string())
|| example.attributes.contains(&"no_run".to_string())
|| example.attributes.contains(&"compile_fail".to_string())
{
return false;
}
let content = &example.content;
if content.contains("// ...")
|| content.contains("/* ... */")
|| content.trim().starts_with("use ") || content.trim().starts_with("//") || content.lines().count() < 2
{
return false;
}
if content.contains("fn example(")
|| content.contains("struct Example")
|| content.contains("// Example:")
{
return false;
}
if content.contains("fn main(")
|| content.contains("#[test]")
|| content.contains("#[tokio::main]")
{
return true;
}
if content.contains("adk_") && (content.contains(".await") || content.contains("async")) {
return true;
}
true
}
fn extract_code_examples(&self, lines: &[&str]) -> Result<Vec<CodeExample>> {
let mut examples = Vec::new();
let mut in_code_block = false;
let mut current_code = String::new();
let mut current_language = String::new();
let mut current_attributes = Vec::new();
let mut start_line = 0;
for (line_num, line) in lines.iter().enumerate() {
if let Some(captures) = self.patterns.code_block.captures(line) {
if line.starts_with("```") {
if in_code_block {
let is_runnable =
self.is_code_runnable(¤t_language, ¤t_attributes);
examples.push(CodeExample {
content: current_code.trim().to_string(),
language: current_language.clone(),
line_number: start_line + 1, is_runnable,
attributes: current_attributes.clone(),
});
current_code.clear();
current_language.clear();
current_attributes.clear();
in_code_block = false;
} else {
if let Some(lang_match) = captures.get(1) {
let lang_spec = lang_match.as_str();
let (language, attributes) = self.parse_language_spec(lang_spec);
current_language = language;
current_attributes = attributes;
}
start_line = line_num;
in_code_block = true;
}
}
} else if in_code_block {
current_code.push_str(line);
current_code.push('\n');
}
}
Ok(examples)
}
fn extract_api_references(&self, lines: &[&str]) -> Result<Vec<ApiReference>> {
let mut references = Vec::new();
for (line_num, line) in lines.iter().enumerate() {
for captures in self.patterns.api_reference.captures_iter(line) {
if let Some(api_match) = captures.get(0) {
let full_path = api_match.as_str();
let (crate_name, item_path, item_type) = self.parse_api_path(full_path);
references.push(ApiReference {
crate_name,
item_path: item_path.to_string(),
item_type,
line_number: line_num + 1,
context: line.to_string(),
});
}
}
}
Ok(references)
}
fn extract_version_references(&self, lines: &[&str]) -> Result<Vec<VersionReference>> {
let mut references = Vec::new();
for (line_num, line) in lines.iter().enumerate() {
for captures in self.patterns.rust_version.captures_iter(line) {
if let Some(version_match) = captures.get(1) {
references.push(VersionReference {
version: version_match.as_str().to_string(),
version_type: VersionType::RustVersion,
line_number: line_num + 1,
context: line.to_string(),
});
}
}
for captures in self.patterns.version_reference.captures_iter(line) {
if let Some(version_match) = captures.get(1) {
let version_type = self.classify_version_type(line, version_match.as_str());
references.push(VersionReference {
version: version_match.as_str().to_string(),
version_type,
line_number: line_num + 1,
context: line.to_string(),
});
}
}
}
Ok(references)
}
fn extract_internal_links(&self, lines: &[&str]) -> Result<Vec<InternalLink>> {
let mut links = Vec::new();
for (line_num, line) in lines.iter().enumerate() {
for captures in self.patterns.internal_link.captures_iter(line) {
if let (Some(text_match), Some(target_match)) = (captures.get(1), captures.get(2)) {
let target = target_match.as_str();
let is_relative = !target.starts_with("http") && !target.starts_with('#');
links.push(InternalLink {
target: target.to_string(),
text: text_match.as_str().to_string(),
line_number: line_num + 1,
is_relative,
});
}
}
}
Ok(links)
}
fn extract_feature_mentions(&self, lines: &[&str]) -> Result<Vec<FeatureMention>> {
let mut mentions = Vec::new();
for (line_num, line) in lines.iter().enumerate() {
for captures in self.patterns.feature_flag.captures_iter(line) {
if let Some(feature_match) = captures.get(1) {
let feature_name = feature_match.as_str().to_string();
let crate_name = self.extract_crate_from_context(line);
mentions.push(FeatureMention {
feature_name,
crate_name,
line_number: line_num + 1,
context: line.to_string(),
});
}
}
}
Ok(mentions)
}
fn is_code_runnable(&self, language: &str, attributes: &[String]) -> bool {
if language == "rust" {
!attributes.contains(&"ignore".to_string())
&& !attributes.contains(&"no_run".to_string())
&& !attributes.contains(&"compile_fail".to_string())
} else {
false
}
}
fn parse_language_spec(&self, lang_spec: &str) -> (String, Vec<String>) {
let parts: Vec<&str> = lang_spec.split(',').map(|s| s.trim()).collect();
if parts.is_empty() {
return ("text".to_string(), Vec::new());
}
let language = parts[0].to_string();
let attributes = parts[1..].iter().map(|s| s.to_string()).collect();
(language, attributes)
}
fn parse_api_path(&self, full_path: &str) -> (String, String, ApiItemType) {
let parts: Vec<&str> = full_path.split("::").collect();
if parts.is_empty() {
return ("unknown".to_string(), full_path.to_string(), ApiItemType::Unknown);
}
let crate_name = parts[0].to_string();
let item_path = full_path.to_string();
let item_type = if let Some(last_part) = parts.last() {
self.infer_api_item_type(last_part)
} else {
ApiItemType::Unknown
};
(crate_name, item_path, item_type)
}
fn infer_api_item_type(&self, item_name: &str) -> ApiItemType {
if item_name.chars().next().is_some_and(|c| c.is_uppercase()) {
if item_name.ends_with("Error") || item_name.ends_with("Result") {
ApiItemType::Enum
} else {
ApiItemType::Struct
}
} else if item_name.contains('(') || item_name.ends_with("()") {
ApiItemType::Function
} else if item_name.chars().all(|c| c.is_uppercase() || c == '_') {
ApiItemType::Constant
} else {
ApiItemType::Unknown
}
}
fn classify_version_type(&self, line: &str, version: &str) -> VersionType {
if line.contains("rust-version") || line.contains("rustc") {
VersionType::RustVersion
} else if line.contains("adk-") || version == self.workspace_version {
VersionType::WorkspaceVersion
} else if line.contains("version") && line.contains("=") {
VersionType::CrateVersion
} else {
VersionType::Generic
}
}
fn extract_crate_from_context(&self, line: &str) -> Option<String> {
if let Some(captures) = self.patterns.toml_dependency.captures(line) {
if let Some(crate_match) = captures.get(1) {
return Some(crate_match.as_str().to_string());
}
}
if let Some(captures) = self.patterns.api_reference.captures(line) {
if let Some(crate_match) = captures.get(1) {
return Some(crate_match.as_str().to_string());
}
}
None
}
}
impl ParserPatterns {
fn new() -> Result<Self> {
Ok(Self {
code_block: Regex::new(r"^```(\w+(?:,\w+)*)?").map_err(|e| AuditError::RegexError {
pattern: "code_block".to_string(),
details: e.to_string(),
})?,
api_reference: Regex::new(r"\b(adk_\w+)::([\w:]+)").map_err(|e| {
AuditError::RegexError {
pattern: "api_reference".to_string(),
details: e.to_string(),
}
})?,
version_reference: Regex::new(r#"version\s*=\s*"([^"]+)""#).map_err(|e| {
AuditError::RegexError {
pattern: "version_reference".to_string(),
details: e.to_string(),
}
})?,
internal_link: Regex::new(r"\[([^\]]+)\]\(([^)]+)\)").map_err(|e| {
AuditError::RegexError {
pattern: "internal_link".to_string(),
details: e.to_string(),
}
})?,
feature_flag: Regex::new(r#"features?\s*=\s*\[?"([^"\]]+)""#).map_err(|e| {
AuditError::RegexError {
pattern: "feature_flag".to_string(),
details: e.to_string(),
}
})?,
rust_version: Regex::new(r#"rust-version\s*=\s*"([^"]+)""#).map_err(|e| {
AuditError::RegexError {
pattern: "rust_version".to_string(),
details: e.to_string(),
}
})?,
toml_dependency: Regex::new(r#"^([a-zA-Z0-9_-]+)\s*=\s*\{"#).map_err(|e| {
AuditError::RegexError {
pattern: "toml_dependency".to_string(),
details: e.to_string(),
}
})?,
})
}
}
impl Default for ParsedDocument {
fn default() -> Self {
Self {
file_path: PathBuf::new(),
code_examples: Vec::new(),
api_references: Vec::new(),
version_references: Vec::new(),
internal_links: Vec::new(),
feature_mentions: Vec::new(),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
fn create_test_parser() -> DocumentationParser {
DocumentationParser::new("0.1.0".to_string(), "1.85.0".to_string()).unwrap()
}
#[test]
fn test_parser_creation() {
let parser = create_test_parser();
assert_eq!(parser.workspace_version, "0.1.0");
assert_eq!(parser.rust_version, "1.85.0");
}
#[test]
fn test_code_block_extraction() {
let parser = create_test_parser();
let content = r#"
# Example
Here's some Rust code:
```rust
fn main() {
println!("Hello, world!");
}
```
And some TOML:
```toml
[dependencies]
serde = "1.0"
```
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert_eq!(result.code_examples.len(), 2);
let rust_example = &result.code_examples[0];
assert_eq!(rust_example.language, "rust");
assert!(rust_example.is_runnable);
assert!(rust_example.content.contains("println!"));
let toml_example = &result.code_examples[1];
assert_eq!(toml_example.language, "toml");
assert!(!toml_example.is_runnable);
}
#[test]
fn test_api_reference_extraction() {
let parser = create_test_parser();
let content = r#"
Use `adk_core::Agent` for creating agents.
The `adk_model::Llm::generate` method is useful.
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert_eq!(result.api_references.len(), 2);
let first_ref = &result.api_references[0];
assert_eq!(first_ref.crate_name, "adk_core");
assert_eq!(first_ref.item_path, "adk_core::Agent");
let second_ref = &result.api_references[1];
assert_eq!(second_ref.crate_name, "adk_model");
assert_eq!(second_ref.item_path, "adk_model::Llm::generate");
}
#[test]
fn test_version_reference_extraction() {
let parser = create_test_parser();
let content = r#"
```toml
[dependencies]
adk-core = { version = "0.1.0" }
serde = { version = "1.0.195" }
[package]
rust-version = "1.85.0"
```
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert!(!result.version_references.is_empty());
}
#[test]
fn test_internal_link_extraction() {
let parser = create_test_parser();
let content = r#"
See the [Getting Started](./getting-started.md) guide.
Check out [API Reference](../api/index.md) for details.
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert_eq!(result.internal_links.len(), 2);
let first_link = &result.internal_links[0];
assert_eq!(first_link.text, "Getting Started");
assert_eq!(first_link.target, "./getting-started.md");
assert!(first_link.is_relative);
}
#[test]
fn test_feature_mention_extraction() {
let parser = create_test_parser();
let content = r#"
```toml
[dependencies]
adk-core = { version = "0.1.0", features = ["async"] }
```
Enable the `cuda` feature for GPU acceleration.
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert!(!result.feature_mentions.is_empty());
}
#[test]
fn test_code_attributes_parsing() {
let parser = create_test_parser();
let content = r#"
```rust,ignore
// This code is ignored
fn ignored_example() {}
```
```rust,no_run
// This code doesn't run
fn no_run_example() {}
```
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert_eq!(result.code_examples.len(), 2);
let ignored_example = &result.code_examples[0];
assert!(!ignored_example.is_runnable);
assert!(ignored_example.attributes.contains(&"ignore".to_string()));
let no_run_example = &result.code_examples[1];
assert!(!no_run_example.is_runnable);
assert!(no_run_example.attributes.contains(&"no_run".to_string()));
}
#[test]
fn test_rust_example_extraction() {
let parser = create_test_parser();
let content = r#"
```rust
fn main() {
println!("This should be runnable");
}
```
```rust,ignore
fn ignored() {}
```
```toml
[dependencies]
serde = "1.0"
```
```rust
// Just a comment
```
"#;
let rust_examples = parser.extract_rust_examples(content).unwrap();
assert_eq!(rust_examples.len(), 3);
assert!(rust_examples[0].is_runnable);
assert!(rust_examples[0].content.contains("main"));
assert!(!rust_examples[1].is_runnable);
assert!(!rust_examples[2].is_runnable);
}
#[test]
fn test_configuration_example_extraction() {
let parser = create_test_parser();
let content = r#"
```toml
[dependencies]
adk-core = "0.1.0"
```
```yaml
version: "3.8"
services:
app:
image: rust:latest
```
```rust
fn main() {}
```
"#;
let config_examples = parser.extract_configuration_examples(content).unwrap();
assert_eq!(config_examples.len(), 2);
assert_eq!(config_examples[0].language, "toml");
assert_eq!(config_examples[1].language, "yaml");
}
#[test]
fn test_enhanced_feature_detection() {
let parser = create_test_parser();
let content = r#"
Enable the `cuda` feature for GPU acceleration:
```toml
[dependencies]
adk-mistralrs = { version = "0.1.0", features = ["cuda", "flash-attn"] }
```
You can also use the `async` feature with adk-core.
"#;
let result = parser.parse_content(&PathBuf::from("test.md"), content).unwrap();
assert!(!result.feature_mentions.is_empty());
let config_examples = parser.extract_configuration_examples(content).unwrap();
assert_eq!(config_examples.len(), 1);
assert!(config_examples[0].content.contains("features"));
}
}