Skip to main content

cc_audit/parser/
traits.rs

1//! Parser traits for the content parsing layer (L4).
2
3use crate::error::Result;
4use serde::{Deserialize, Serialize};
5
6/// The type of content detected in a file.
7#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
8pub enum ContentType {
9    /// Markdown document (SKILL.md, CLAUDE.md, commands, etc.)
10    Markdown,
11    /// JSON configuration (mcp.json, package.json, etc.)
12    Json,
13    /// YAML configuration (docker-compose.yml, subagent configs)
14    Yaml,
15    /// TOML configuration (Cargo.toml, pyproject.toml)
16    Toml,
17    /// Dockerfile
18    Dockerfile,
19    /// Plain text
20    PlainText,
21    /// Unknown/binary content
22    Unknown,
23}
24
25impl ContentType {
26    /// Detect content type from file extension.
27    pub fn from_extension(ext: &str) -> Self {
28        match ext.to_lowercase().as_str() {
29            "md" | "markdown" => Self::Markdown,
30            "json" => Self::Json,
31            "yml" | "yaml" => Self::Yaml,
32            "toml" => Self::Toml,
33            "dockerfile" => Self::Dockerfile,
34            "txt" | "text" => Self::PlainText,
35            _ => Self::Unknown,
36        }
37    }
38
39    /// Detect content type from filename.
40    pub fn from_filename(filename: &str) -> Self {
41        let lower = filename.to_lowercase();
42
43        // Special filenames
44        if lower == "dockerfile" || lower.starts_with("dockerfile.") {
45            return Self::Dockerfile;
46        }
47
48        // Check extension
49        if let Some(ext) = filename.rsplit('.').next() {
50            let content_type = Self::from_extension(ext);
51            if content_type != Self::Unknown {
52                return content_type;
53            }
54        }
55
56        Self::Unknown
57    }
58}
59
60/// Parsed content from a file.
61#[derive(Debug, Clone)]
62pub struct ParsedContent {
63    /// The detected content type.
64    pub content_type: ContentType,
65    /// The raw file content.
66    pub raw_content: String,
67    /// Parsed structured data (if applicable).
68    pub structured_data: Option<serde_json::Value>,
69    /// Extracted frontmatter (for markdown files).
70    pub frontmatter: Option<String>,
71    /// Source file path.
72    pub source_path: String,
73}
74
75impl ParsedContent {
76    /// Create a new ParsedContent with minimal data.
77    pub fn new(content_type: ContentType, raw_content: String, source_path: String) -> Self {
78        Self {
79            content_type,
80            raw_content,
81            structured_data: None,
82            frontmatter: None,
83            source_path,
84        }
85    }
86
87    /// Add structured data to the parsed content.
88    pub fn with_structured_data(mut self, data: serde_json::Value) -> Self {
89        self.structured_data = Some(data);
90        self
91    }
92
93    /// Add frontmatter to the parsed content.
94    pub fn with_frontmatter(mut self, frontmatter: String) -> Self {
95        self.frontmatter = Some(frontmatter);
96        self
97    }
98}
99
100/// Trait for content parsers (L4).
101///
102/// Each parser handles a specific file format and extracts
103/// structured data for the detection engine.
104pub trait ContentParser: Send + Sync {
105    /// Parse the file content.
106    fn parse(&self, content: &str, path: &str) -> Result<ParsedContent>;
107
108    /// Get the file extensions this parser supports.
109    fn supported_extensions(&self) -> &[&str];
110
111    /// Check if this parser can handle the given file.
112    fn can_parse(&self, path: &str) -> bool {
113        let path_lower = path.to_lowercase();
114        self.supported_extensions()
115            .iter()
116            .any(|ext| path_lower.ends_with(ext))
117    }
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123
124    #[test]
125    fn test_content_type_from_extension() {
126        assert_eq!(ContentType::from_extension("md"), ContentType::Markdown);
127        assert_eq!(ContentType::from_extension("json"), ContentType::Json);
128        assert_eq!(ContentType::from_extension("yml"), ContentType::Yaml);
129        assert_eq!(ContentType::from_extension("yaml"), ContentType::Yaml);
130        assert_eq!(ContentType::from_extension("toml"), ContentType::Toml);
131        assert_eq!(ContentType::from_extension("txt"), ContentType::PlainText);
132        assert_eq!(ContentType::from_extension("exe"), ContentType::Unknown);
133    }
134
135    #[test]
136    fn test_content_type_from_filename() {
137        assert_eq!(
138            ContentType::from_filename("SKILL.md"),
139            ContentType::Markdown
140        );
141        assert_eq!(
142            ContentType::from_filename("package.json"),
143            ContentType::Json
144        );
145        assert_eq!(
146            ContentType::from_filename("Dockerfile"),
147            ContentType::Dockerfile
148        );
149        assert_eq!(
150            ContentType::from_filename("Dockerfile.prod"),
151            ContentType::Dockerfile
152        );
153        assert_eq!(
154            ContentType::from_filename("docker-compose.yml"),
155            ContentType::Yaml
156        );
157    }
158
159    #[test]
160    fn test_parsed_content_builder() {
161        let content = ParsedContent::new(
162            ContentType::Markdown,
163            "# Test".to_string(),
164            "test.md".to_string(),
165        )
166        .with_frontmatter("name: test".to_string());
167
168        assert_eq!(content.content_type, ContentType::Markdown);
169        assert_eq!(content.frontmatter, Some("name: test".to_string()));
170    }
171}