Skip to main content

cc_audit/parser/
mod.rs

1//! Content parsing layer (L4).
2//!
3//! This module provides parsers for different file formats:
4//! - Markdown (SKILL.md, CLAUDE.md, commands)
5//! - JSON (mcp.json, package.json, settings.json)
6//! - YAML (docker-compose.yml, subagent configs)
7//! - TOML (Cargo.toml, pyproject.toml)
8//! - Dockerfile
9//!
10//! Each parser implements the `ContentParser` trait and extracts
11//! structured data for the detection engine (L5).
12
13pub mod dockerfile;
14pub mod frontmatter;
15pub mod json;
16pub mod markdown;
17pub mod toml;
18pub mod traits;
19pub mod yaml;
20
21// Re-exports for convenience
22pub use dockerfile::DockerfileParser;
23pub use frontmatter::FrontmatterParser;
24pub use json::JsonParser;
25pub use markdown::MarkdownParser;
26pub use toml::TomlParser;
27pub use traits::{ContentParser, ContentType, ParsedContent};
28pub use yaml::YamlParser;
29
30use crate::error::Result;
31
32/// Registry of all available parsers.
33pub struct ParserRegistry {
34    parsers: Vec<Box<dyn ContentParser>>,
35}
36
37impl ParserRegistry {
38    /// Create a new registry with all default parsers.
39    pub fn new() -> Self {
40        Self {
41            parsers: vec![
42                Box::new(MarkdownParser::new()),
43                Box::new(JsonParser::new()),
44                Box::new(YamlParser::new()),
45                Box::new(TomlParser::new()),
46                Box::new(DockerfileParser::new()),
47            ],
48        }
49    }
50
51    /// Find a parser that can handle the given path.
52    pub fn find_parser(&self, path: &str) -> Option<&dyn ContentParser> {
53        self.parsers
54            .iter()
55            .find(|p| p.can_parse(path))
56            .map(|p| p.as_ref())
57    }
58
59    /// Parse content using the appropriate parser.
60    pub fn parse(&self, content: &str, path: &str) -> Result<ParsedContent> {
61        if let Some(parser) = self.find_parser(path) {
62            parser.parse(content, path)
63        } else {
64            // Default to plain text
65            Ok(ParsedContent::new(
66                ContentType::PlainText,
67                content.to_string(),
68                path.to_string(),
69            ))
70        }
71    }
72}
73
74impl Default for ParserRegistry {
75    fn default() -> Self {
76        Self::new()
77    }
78}
79
80#[cfg(test)]
81mod tests {
82    use super::*;
83
84    #[test]
85    fn test_registry_find_parser() {
86        let registry = ParserRegistry::new();
87
88        assert!(registry.find_parser("test.md").is_some());
89        assert!(registry.find_parser("config.json").is_some());
90        assert!(registry.find_parser("docker-compose.yml").is_some());
91        assert!(registry.find_parser("Cargo.toml").is_some());
92        assert!(registry.find_parser("Dockerfile").is_some());
93    }
94
95    #[test]
96    fn test_registry_parse_unknown() {
97        let registry = ParserRegistry::new();
98        let result = registry.parse("content", "unknown.xyz").unwrap();
99
100        assert_eq!(result.content_type, ContentType::PlainText);
101    }
102
103    #[test]
104    fn test_registry_parse_markdown() {
105        let registry = ParserRegistry::new();
106        let result = registry
107            .parse("---\nname: test\n---\n# Content", "SKILL.md")
108            .unwrap();
109
110        assert_eq!(result.content_type, ContentType::Markdown);
111        assert!(result.frontmatter.is_some());
112    }
113
114    #[test]
115    fn test_registry_parse_json() {
116        let registry = ParserRegistry::new();
117        let result = registry
118            .parse(r#"{"name": "test"}"#, "config.json")
119            .unwrap();
120
121        assert_eq!(result.content_type, ContentType::Json);
122        assert!(result.structured_data.is_some());
123    }
124}