use crate::core::{Content, ExtractionResult, Metadata, Result};
use crate::parsers::Parser;
use crate::utils::security::{validate_file_size, FileSizeLimits};
use cssparser::{Parser as CssParserImpl, ParserInput, Token};
pub struct CssParser;
impl CssParser {
fn analyze_css(css_content: &str) -> (usize, usize, Vec<String>) {
let mut rule_count = 0;
let mut selector_count = 0;
let mut imports = Vec::new();
let mut input = ParserInput::new(css_content);
let mut parser = CssParserImpl::new(&mut input);
let mut in_selector = false;
let mut brace_depth = 0;
loop {
let token = match parser.next_including_whitespace_and_comments() {
Ok(t) => t,
Err(_) => break,
};
match token {
Token::AtKeyword(keyword) => {
if keyword.eq_ignore_ascii_case("import") {
if let Ok(import_url) = Self::extract_import_url(&mut parser) {
imports.push(import_url);
}
}
}
Token::CurlyBracketBlock => {
if brace_depth == 0 {
rule_count += 1;
if in_selector {
selector_count += 1;
in_selector = false;
}
}
brace_depth += 1;
}
Token::Comma => {
if brace_depth == 0 && in_selector {
selector_count += 1;
}
}
Token::Ident(_) | Token::Hash(_) | Token::Delim('.') => {
if brace_depth == 0 {
in_selector = true;
}
}
_ => {}
}
}
if in_selector {
selector_count += 1;
}
(rule_count, selector_count, imports)
}
fn extract_import_url(parser: &mut CssParserImpl) -> std::result::Result<String, ()> {
loop {
match parser.next_including_whitespace_and_comments() {
Ok(Token::WhiteSpace(_)) => continue,
Ok(Token::QuotedString(s)) => return Ok(s.to_string()),
Ok(Token::UnquotedUrl(s)) => return Ok(s.to_string()),
Ok(Token::Function(name)) if name.eq_ignore_ascii_case("url") => {
loop {
match parser.next_including_whitespace_and_comments() {
Ok(Token::WhiteSpace(_)) => continue,
Ok(Token::QuotedString(s)) => return Ok(s.to_string()),
Ok(Token::UnquotedUrl(s)) => return Ok(s.to_string()),
_ => return Err(()),
}
}
}
_ => return Err(()),
}
}
}
}
impl Parser for CssParser {
fn name(&self) -> &str {
"CssParser"
}
fn supported_types(&self) -> &[&str] {
&["text/css"]
}
fn parse(&self, data: &[u8], mime_type: &str) -> Result<ExtractionResult> {
validate_file_size(data, FileSizeLimits::CSS, "CSS")?;
let content = String::from_utf8_lossy(data).to_string();
let (rule_count, selector_count, imports) = Self::analyze_css(&content);
let mut metadata = Metadata::new();
metadata.insert(
"rule_count".to_string(),
crate::core::MetadataValue::Number(rule_count as i64),
);
metadata.insert(
"selector_count".to_string(),
crate::core::MetadataValue::Number(selector_count as i64),
);
if !imports.is_empty() {
metadata.insert(
"imports".to_string(),
crate::core::MetadataValue::Text(imports.join(", ")),
);
}
Ok(ExtractionResult {
mime_type: mime_type.to_string(),
content: Content::Text(content),
metadata,
detection_confidence: 0.6,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::core::MetadataValue;
#[test]
fn test_css_parser_name() {
let parser = CssParser;
assert_eq!(parser.name(), "CssParser");
}
#[test]
fn test_css_parser_supported_types() {
let parser = CssParser;
assert_eq!(parser.supported_types(), &["text/css"]);
}
#[test]
fn test_css_parser_basic() {
let parser = CssParser;
let css_content = r#"
body {
margin: 0;
padding: 0;
}
h1 {
color: blue;
}
"#;
let result = parser.parse(css_content.as_bytes(), "text/css").unwrap();
match result.content {
Content::Text(text) => {
assert!(text.contains("body"));
assert!(text.contains("margin"));
}
_ => panic!("Expected text content"),
}
assert!(result.metadata.get("rule_count").is_some());
assert!(result.metadata.get("selector_count").is_some());
}
#[test]
fn test_css_parser_with_imports() {
let parser = CssParser;
let css_content = r#"
@import url("base.css");
@import "theme.css";
body {
margin: 0;
}
"#;
let result = parser.parse(css_content.as_bytes(), "text/css").unwrap();
if let Some(MetadataValue::Text(imports)) = result.metadata.get("imports") {
assert!(imports.contains("base.css") || imports.contains("theme.css"));
} else {
panic!("Expected imports in metadata");
}
}
#[test]
fn test_css_parser_counts() {
let parser = CssParser;
let css_content = r#"
body { margin: 0; }
h1, h2, h3 { color: blue; }
.container { width: 100%; }
"#;
let result = parser.parse(css_content.as_bytes(), "text/css").unwrap();
if let Some(MetadataValue::Number(count)) = result.metadata.get("rule_count") {
assert!(*count > 0, "Should have at least one rule");
} else {
panic!("Expected rule_count in metadata");
}
if let Some(MetadataValue::Number(count)) = result.metadata.get("selector_count") {
assert!(*count > 0, "Should have at least one selector");
} else {
panic!("Expected selector_count in metadata");
}
}
#[test]
fn test_css_parser_empty() {
let parser = CssParser;
let css_content = "";
let result = parser.parse(css_content.as_bytes(), "text/css").unwrap();
assert_eq!(result.mime_type, "text/css");
match result.content {
Content::Text(text) => assert_eq!(text, ""),
_ => panic!("Expected text content"),
}
}
}