agpm_cli/metadata/
extractor.rs

1//! Extract dependency metadata from resource files.
2//!
3//! This module handles the extraction of transitive dependency information
4//! from resource files. Supports YAML frontmatter in Markdown files and
5//! JSON fields in JSON configuration files.
6
7use anyhow::{Context, Result};
8use serde_json::Value as JsonValue;
9use std::path::Path;
10
11use crate::manifest::DependencyMetadata;
12
13/// Metadata extractor for resource files.
14///
15/// Extracts dependency information embedded in resource files:
16/// - Markdown files (.md): YAML frontmatter between `---` delimiters
17/// - JSON files (.json): `dependencies` field in the JSON structure
18/// - Other files: No dependencies supported
19pub struct MetadataExtractor;
20
21impl MetadataExtractor {
22    /// Extract dependency metadata from a file's content.
23    ///
24    /// # Arguments
25    /// * `path` - Path to the file (used to determine file type)
26    /// * `content` - Content of the file
27    ///
28    /// # Returns
29    /// * `DependencyMetadata` - Extracted metadata (may be empty)
30    pub fn extract(path: &Path, content: &str) -> Result<DependencyMetadata> {
31        let extension = path.extension().and_then(|s| s.to_str()).unwrap_or("");
32
33        match extension {
34            "md" => Self::extract_markdown_frontmatter(content),
35            "json" => Self::extract_json_field(content),
36            _ => {
37                // Scripts and other files don't support embedded dependencies
38                Ok(DependencyMetadata::default())
39            }
40        }
41    }
42
43    /// Extract YAML frontmatter from Markdown content.
44    ///
45    /// Looks for content between `---` delimiters at the start of the file.
46    fn extract_markdown_frontmatter(content: &str) -> Result<DependencyMetadata> {
47        // Check if content starts with frontmatter delimiter
48        if !content.starts_with("---\n") && !content.starts_with("---\r\n") {
49            return Ok(DependencyMetadata::default());
50        }
51
52        // Find the end of frontmatter
53        let search_start = if content.starts_with("---\n") {
54            4
55        } else {
56            5
57        };
58
59        let end_pattern = if content.contains("\r\n") {
60            "\r\n---\r\n"
61        } else {
62            "\n---\n"
63        };
64
65        if let Some(end_pos) = content[search_start..].find(end_pattern) {
66            let frontmatter = &content[search_start..search_start + end_pos];
67
68            // Parse YAML frontmatter
69            match serde_yaml::from_str::<DependencyMetadata>(frontmatter) {
70                Ok(metadata) => Ok(metadata),
71                Err(e) => {
72                    // Log warning but don't fail - malformed frontmatter is not fatal
73                    tracing::warn!("Warning: Unable to parse YAML frontmatter: {}", e);
74                    Ok(DependencyMetadata::default())
75                }
76            }
77        } else {
78            // No closing delimiter found
79            Ok(DependencyMetadata::default())
80        }
81    }
82
83    /// Extract dependencies field from JSON content.
84    ///
85    /// Looks for a `dependencies` field in the top-level JSON object.
86    fn extract_json_field(content: &str) -> Result<DependencyMetadata> {
87        let json: JsonValue =
88            serde_json::from_str(content).with_context(|| "Failed to parse JSON content")?;
89
90        if let Some(deps) = json.get("dependencies") {
91            // The dependencies field should match our expected structure
92            let dependencies = serde_json::from_value(deps.clone())
93                .with_context(|| "Failed to parse dependencies field")?;
94
95            Ok(DependencyMetadata {
96                dependencies: Some(dependencies),
97            })
98        } else {
99            Ok(DependencyMetadata::default())
100        }
101    }
102
103    /// Extract metadata from file content without knowing the file type.
104    ///
105    /// Tries to detect the format automatically.
106    pub fn extract_auto(content: &str) -> Result<DependencyMetadata> {
107        // Try YAML frontmatter first (for Markdown)
108        if (content.starts_with("---\n") || content.starts_with("---\r\n"))
109            && let Ok(metadata) = Self::extract_markdown_frontmatter(content)
110            && metadata.has_dependencies()
111        {
112            return Ok(metadata);
113        }
114
115        // Try JSON format
116        if content.trim_start().starts_with('{')
117            && let Ok(metadata) = Self::extract_json_field(content)
118            && metadata.has_dependencies()
119        {
120            return Ok(metadata);
121        }
122
123        // No metadata found
124        Ok(DependencyMetadata::default())
125    }
126}
127
128#[cfg(test)]
129mod tests {
130    use super::*;
131
132    #[test]
133    fn test_extract_markdown_frontmatter() {
134        let content = r#"---
135dependencies:
136  agents:
137    - path: agents/helper.md
138      version: v1.0.0
139    - path: agents/reviewer.md
140  snippets:
141    - path: snippets/utils.md
142---
143
144# My Command
145
146This is the command documentation."#;
147
148        let path = Path::new("command.md");
149        let metadata = MetadataExtractor::extract(path, content).unwrap();
150
151        assert!(metadata.has_dependencies());
152        let deps = metadata.dependencies.unwrap();
153        assert_eq!(deps["agents"].len(), 2);
154        assert_eq!(deps["snippets"].len(), 1);
155        assert_eq!(deps["agents"][0].path, "agents/helper.md");
156        assert_eq!(deps["agents"][0].version, Some("v1.0.0".to_string()));
157    }
158
159    #[test]
160    fn test_extract_markdown_no_frontmatter() {
161        let content = r#"# My Command
162
163This is a command without frontmatter."#;
164
165        let path = Path::new("command.md");
166        let metadata = MetadataExtractor::extract(path, content).unwrap();
167
168        assert!(!metadata.has_dependencies());
169    }
170
171    #[test]
172    fn test_extract_json_dependencies() {
173        let content = r#"{
174  "events": ["UserPromptSubmit"],
175  "type": "command",
176  "command": ".claude/agpm/scripts/test.js",
177  "dependencies": {
178    "scripts": [
179      { "path": "scripts/test-runner.sh", "version": "v1.0.0" },
180      { "path": "scripts/validator.py" }
181    ],
182    "agents": [
183      { "path": "agents/code-analyzer.md", "version": "~1.2.0" }
184    ]
185  }
186}"#;
187
188        let path = Path::new("hook.json");
189        let metadata = MetadataExtractor::extract(path, content).unwrap();
190
191        assert!(metadata.has_dependencies());
192        let deps = metadata.dependencies.unwrap();
193        assert_eq!(deps["scripts"].len(), 2);
194        assert_eq!(deps["agents"].len(), 1);
195        assert_eq!(deps["scripts"][0].path, "scripts/test-runner.sh");
196        assert_eq!(deps["scripts"][0].version, Some("v1.0.0".to_string()));
197    }
198
199    #[test]
200    fn test_extract_json_no_dependencies() {
201        let content = r#"{
202  "command": "npx",
203  "args": ["-y", "@modelcontextprotocol/server-github"]
204}"#;
205
206        let path = Path::new("mcp.json");
207        let metadata = MetadataExtractor::extract(path, content).unwrap();
208
209        assert!(!metadata.has_dependencies());
210    }
211
212    #[test]
213    fn test_extract_script_file() {
214        let content = r#"#!/bin/bash
215echo "This is a script file"
216# Scripts don't support dependencies"#;
217
218        let path = Path::new("script.sh");
219        let metadata = MetadataExtractor::extract(path, content).unwrap();
220
221        assert!(!metadata.has_dependencies());
222    }
223
224    #[test]
225    fn test_extract_auto_markdown() {
226        let content = r#"---
227dependencies:
228  agents:
229    - path: agents/test.md
230---
231
232# Content"#;
233
234        let metadata = MetadataExtractor::extract_auto(content).unwrap();
235        assert!(metadata.has_dependencies());
236        assert_eq!(metadata.dependency_count(), 1);
237    }
238
239    #[test]
240    fn test_extract_auto_json() {
241        let content = r#"{
242  "dependencies": {
243    "snippets": [
244      { "path": "snippets/test.md" }
245    ]
246  }
247}"#;
248
249        let metadata = MetadataExtractor::extract_auto(content).unwrap();
250        assert!(metadata.has_dependencies());
251        assert_eq!(metadata.dependency_count(), 1);
252    }
253
254    #[test]
255    fn test_windows_line_endings() {
256        let content = "---\r\ndependencies:\r\n  agents:\r\n    - path: agents/test.md\r\n---\r\n\r\n# Content";
257
258        let path = Path::new("command.md");
259        let metadata = MetadataExtractor::extract(path, content).unwrap();
260
261        assert!(metadata.has_dependencies());
262        let deps = metadata.dependencies.unwrap();
263        assert_eq!(deps["agents"].len(), 1);
264        assert_eq!(deps["agents"][0].path, "agents/test.md");
265    }
266
267    #[test]
268    fn test_empty_dependencies() {
269        let content = r#"---
270dependencies:
271---
272
273# Content"#;
274
275        let path = Path::new("command.md");
276        let metadata = MetadataExtractor::extract(path, content).unwrap();
277
278        // Should parse successfully but have no dependencies
279        assert!(!metadata.has_dependencies());
280    }
281
282    #[test]
283    fn test_malformed_yaml() {
284        let content = r#"---
285dependencies:
286  agents:
287    - path: agents/test.md
288    version: missing dash
289---
290
291# Content"#;
292
293        let path = Path::new("command.md");
294        let result = MetadataExtractor::extract(path, content);
295
296        // Should succeed but return empty metadata (with warning logged)
297        assert!(result.is_ok());
298        let metadata = result.unwrap();
299        assert!(metadata.dependencies.is_none());
300    }
301}