use anyhow::Result;
use regex::Regex;
use std::path::Path;
use crate::markdown::frontmatter::FrontmatterParser;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct MissingReference {
pub source_file: String,
pub referenced_path: String,
}
impl MissingReference {
#[must_use]
pub fn new(source_file: String, referenced_path: String) -> Self {
Self {
source_file,
referenced_path,
}
}
}
#[must_use]
pub fn extract_file_references(content: &str) -> Vec<String> {
let mut references = Vec::new();
let content_without_frontmatter = remove_frontmatter(content);
let content_without_code = remove_code_blocks(&content_without_frontmatter);
if let Ok(link_regex) = Regex::new(r"\[([^\]]+)\]\(([^)]+)\)") {
for cap in link_regex.captures_iter(&content_without_code) {
if let Some(path) = cap.get(2) {
let path_str = path.as_str();
if path_str.ends_with(".md") && is_valid_file_reference(path_str) {
references.push(path_str.to_string());
}
}
}
}
if let Ok(path_regex) = Regex::new(r#"(?:^|\s|["'`])([./a-zA-Z_][\w./-]*\.md)(?:\s|["'`]|$)"#) {
for cap in path_regex.captures_iter(&content_without_code) {
if let Some(path) = cap.get(1) {
let path_str = path.as_str();
if is_valid_file_reference(path_str) {
references.push(path_str.to_string());
}
}
}
}
let mut seen = std::collections::HashSet::new();
references.retain(|r| seen.insert(r.clone()));
references
}
fn remove_frontmatter(content: &str) -> String {
let parser = FrontmatterParser::new();
parser.strip_frontmatter(content)
}
fn remove_code_blocks(content: &str) -> String {
let mut result = String::new();
let mut in_code_block = false;
let mut chars = content.chars().peekable();
while let Some(ch) = chars.next() {
if ch == '`' {
let mut backtick_count = 1;
while chars.peek() == Some(&'`') {
backtick_count += 1;
chars.next();
}
if backtick_count >= 3 {
in_code_block = !in_code_block;
for _ in 0..backtick_count {
result.push(' ');
}
continue;
}
for _ in 0..backtick_count {
result.push('`');
}
continue;
}
if in_code_block {
result.push(' '); } else {
result.push(ch);
}
}
result
}
#[must_use]
pub fn is_valid_file_reference(path: &str) -> bool {
let trimmed = path.trim();
if trimmed.is_empty() {
return false;
}
if trimmed.contains("://") {
return false;
}
if trimmed.starts_with('/') {
return false;
}
if trimmed.starts_with('#') {
return false;
}
if !trimmed.contains('.') {
return false;
}
if !trimmed.contains('/') {
return false;
}
true
}
pub fn validate_file_references(references: &[String], project_dir: &Path) -> Result<Vec<String>> {
let mut missing = Vec::new();
for reference in references {
let full_path = project_dir.join(reference);
if !full_path.exists() {
missing.push(reference.clone());
}
}
Ok(missing)
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
use tempfile::tempdir;
#[test]
fn test_extract_markdown_links() {
let content = r#"
Check the [documentation](./docs/guide.md) for more info.
Also see [examples](../examples/demo.md).
"#;
let refs = extract_file_references(content);
assert_eq!(refs.len(), 2);
assert!(refs.contains(&"./docs/guide.md".to_string()));
assert!(refs.contains(&"../examples/demo.md".to_string()));
}
#[test]
fn test_extract_direct_file_paths() {
let content = r#"
See `.agpm/snippets/example.md` for the implementation.
Check `./docs/overview.md` and `.claude/agents/test.md`.
"#;
let refs = extract_file_references(content);
assert!(refs.contains(&".agpm/snippets/example.md".to_string()));
assert!(refs.contains(&".claude/agents/test.md".to_string()));
assert!(refs.contains(&"./docs/overview.md".to_string()));
}
#[test]
fn test_skip_urls() {
let content = r#"
Visit [GitHub](https://github.com/user/repo) for source.
Or check http://example.com/page.html.
"#;
let refs = extract_file_references(content);
assert_eq!(refs.len(), 0);
}
#[test]
fn test_skip_code_blocks() {
let content = r#"
Normal reference: `.agpm/snippets/real.md`
```bash
# This should be skipped: `.agpm/snippets/code.md`
cat .agpm/snippets/example.md
```
Another real reference: `docs/guide.md`
"#;
let refs = extract_file_references(content);
assert!(refs.contains(&".agpm/snippets/real.md".to_string()));
assert!(refs.contains(&"docs/guide.md".to_string()));
assert!(!refs.iter().any(|r| r.contains("code.md")));
}
#[test]
fn test_inline_code_path_extraction() {
let content = "Check `.agpm/real.md` for details.";
let refs = extract_file_references(content);
assert!(refs.contains(&".agpm/real.md".to_string()));
}
#[test]
fn test_deduplication() {
let content = r#"
See `.agpm/snippets/example.md` for details.
Also check `.agpm/snippets/example.md` again.
"#;
let refs = extract_file_references(content);
assert_eq!(refs.len(), 1);
}
#[test]
fn test_is_valid_file_reference() {
assert!(is_valid_file_reference("./docs/guide.md"));
assert!(is_valid_file_reference(".agpm/snippets/file.md"));
assert!(is_valid_file_reference("../parent/file.json"));
assert!(!is_valid_file_reference("https://example.com"));
assert!(!is_valid_file_reference("http://test.com/file.md"));
assert!(!is_valid_file_reference("/absolute/path.md"));
assert!(!is_valid_file_reference("#anchor"));
assert!(!is_valid_file_reference(""));
assert!(!is_valid_file_reference("no-extension"));
}
#[test]
fn test_validate_file_references() -> Result<()> {
let temp_dir = tempdir()?;
let project_dir = temp_dir.path();
let existing_dir = project_dir.join(".agpm").join("snippets");
fs::create_dir_all(&existing_dir)?;
fs::write(existing_dir.join("existing.md"), "content")?;
let references = vec![
".agpm/snippets/existing.md".to_string(),
".agpm/snippets/missing.md".to_string(),
"nonexistent/file.md".to_string(),
];
let missing = validate_file_references(&references, project_dir)?;
assert_eq!(missing.len(), 2);
assert!(missing.contains(&".agpm/snippets/missing.md".to_string()));
assert!(missing.contains(&"nonexistent/file.md".to_string()));
assert!(!missing.contains(&".agpm/snippets/existing.md".to_string()));
Ok(())
}
#[test]
fn test_remove_code_blocks() {
let content = r#"
Normal text with `.agpm/file.md`
```rust
let path = ".agpm/in_code.md";
```
More normal text `.agpm/another.md`
"#;
let cleaned = remove_code_blocks(content);
assert!(cleaned.contains(".agpm/file.md"));
assert!(cleaned.contains(".agpm/another.md"));
assert!(
!cleaned.contains("in_code.md")
|| cleaned.split_whitespace().all(|word| !word.contains("in_code.md"))
);
}
#[test]
fn test_remove_frontmatter() {
let content = r#"---
dependencies:
agents:
- path: agents/helper.md
snippets:
- path: snippets/utils.md
---
# Main Content
See [documentation](./docs/guide.md) for details.
"#;
let cleaned = remove_frontmatter(content);
assert!(!cleaned.contains("dependencies:"));
assert!(!cleaned.contains("agents/helper.md"));
assert!(!cleaned.contains("snippets/utils.md"));
assert!(cleaned.contains("# Main Content"));
assert!(cleaned.contains("./docs/guide.md"));
}
#[test]
fn test_extract_with_frontmatter_dependencies() {
let content = r#"---
dependencies:
agents:
- path: agents/helper.md
version: v1.0.0
snippets:
- path: .agpm/snippets/utils.md
---
# Command
See [real reference](./docs/guide.md) for details.
Check `.claude/agents/example.md` for the implementation.
"#;
let refs = extract_file_references(content);
assert!(refs.contains(&"./docs/guide.md".to_string()));
assert!(refs.contains(&".claude/agents/example.md".to_string()));
assert!(!refs.contains(&"agents/helper.md".to_string()));
assert!(!refs.contains(&".agpm/snippets/utils.md".to_string()));
}
#[test]
fn test_complex_markdown_with_mixed_references() {
let content = r#"
# Documentation
See the [main guide](./docs/guide.md) for details.
## Implementation
The core logic is in `.agpm/snippets/core.md` file.
```rust
// This code reference should be ignored
let path = ".agpm/snippets/ignored.md";
```
Also check:
- [Examples](../examples/demo.md)
- External: https://github.com/user/repo
- `.claude/agents/helper.md`
Inline code like `example.md` should be skipped.
"#;
let refs = extract_file_references(content);
assert!(refs.contains(&"./docs/guide.md".to_string()));
assert!(refs.contains(&".agpm/snippets/core.md".to_string()));
assert!(refs.contains(&"../examples/demo.md".to_string()));
assert!(refs.contains(&".claude/agents/helper.md".to_string()));
assert!(!refs.iter().any(|r| r.contains("github.com")));
assert!(!refs.iter().any(|r| r.contains("ignored.md")));
assert!(!refs.contains(&"example.md".to_string())); }
}