oxipdf-html 0.1.0

HTML+CSS → StyledTree adapter for the oxipdf PDF engine
Documentation
//! CSS property parsing and cascade resolution.
//!
//! Parses inline `style=""` attributes and `<style>` block declarations,
//! then maps CSS properties to `ResolvedStyle` fields.

mod properties;
mod values;

pub(crate) use properties::apply_declarations;

// ---------------------------------------------------------------------------
// CSS rule types
// ---------------------------------------------------------------------------

/// A single CSS declaration (property: value with optional !important).
#[derive(Debug, Clone)]
pub(crate) struct Declaration {
    pub property: String,
    pub value: String,
    pub important: bool,
}

/// A parsed CSS rule: selector string + declarations.
#[derive(Debug, Clone)]
pub(crate) struct CssRule {
    pub selector: String,
    pub declarations: Vec<Declaration>,
    pub specificity: Specificity,
}

/// CSS specificity: (inline, id, class, element).
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Default)]
pub(crate) struct Specificity {
    pub inline: u16,
    pub id: u16,
    pub class: u16,
    pub element: u16,
}

// ---------------------------------------------------------------------------
// Parse <style> blocks into rules
// ---------------------------------------------------------------------------

/// Parse CSS text from a `<style>` block into a list of rules.
pub(crate) fn parse_stylesheet(css: &str) -> Vec<CssRule> {
    let mut rules = Vec::new();
    let mut pos = 0;
    let bytes = css.as_bytes();

    while pos < bytes.len() {
        pos = skip_ws_and_comments(css, pos);
        if pos >= bytes.len() {
            break;
        }

        let brace_start = match css[pos..].find('{') {
            Some(i) => pos + i,
            None => break,
        };
        let selector_text = css[pos..brace_start].trim();
        if selector_text.is_empty() {
            pos = brace_start + 1;
            continue;
        }

        let brace_end = match css[brace_start + 1..].find('}') {
            Some(i) => brace_start + 1 + i,
            None => break,
        };
        let decl_text = &css[brace_start + 1..brace_end];

        for sel in selector_text.split(',') {
            let sel = sel.trim();
            if sel.is_empty() {
                continue;
            }
            let declarations = parse_declarations(decl_text);
            let specificity = compute_specificity(sel);
            rules.push(CssRule {
                selector: sel.to_string(),
                declarations,
                specificity,
            });
        }

        pos = brace_end + 1;
    }

    rules
}

fn skip_ws_and_comments(css: &str, mut pos: usize) -> usize {
    let bytes = css.as_bytes();
    while pos < bytes.len() {
        if bytes[pos].is_ascii_whitespace() {
            pos += 1;
        } else if pos + 1 < bytes.len() && bytes[pos] == b'/' && bytes[pos + 1] == b'*' {
            if let Some(end) = css[pos + 2..].find("*/") {
                pos = pos + 2 + end + 2;
            } else {
                pos = bytes.len();
            }
        } else {
            break;
        }
    }
    pos
}

/// Compute specificity for a simple selector string.
pub(crate) fn compute_specificity(selector: &str) -> Specificity {
    let mut id = 0u16;
    let mut class = 0u16;
    let mut element = 0u16;

    for part in selector.split([' ', '>', '+', '~']) {
        let part = part.trim();
        if part.is_empty() {
            continue;
        }
        for token in split_selector_tokens(part) {
            if token.starts_with('#') {
                id += 1;
            } else if token.starts_with('.') || token.starts_with('[') || token.starts_with(':') {
                class += 1;
            } else if !token.is_empty() && token != "*" {
                element += 1;
            }
        }
    }

    Specificity {
        inline: 0,
        id,
        class,
        element,
    }
}

/// Split "div.class#id" into ["div", ".class", "#id"].
fn split_selector_tokens(s: &str) -> Vec<&str> {
    let mut tokens = Vec::new();
    let mut start = 0;
    let bytes = s.as_bytes();

    for i in 1..bytes.len() {
        if bytes[i] == b'.' || bytes[i] == b'#' || bytes[i] == b'[' || bytes[i] == b':' {
            if i > start {
                tokens.push(&s[start..i]);
            }
            start = i;
        }
    }
    if start < bytes.len() {
        tokens.push(&s[start..]);
    }
    tokens
}

// ---------------------------------------------------------------------------
// Parse declaration blocks
// ---------------------------------------------------------------------------

/// Parse "property: value; property: value !important;" into declarations.
pub(crate) fn parse_declarations(text: &str) -> Vec<Declaration> {
    let mut decls = Vec::new();
    for decl in text.split(';') {
        let decl = decl.trim();
        if decl.is_empty() {
            continue;
        }
        if let Some((prop, val)) = decl.split_once(':') {
            let prop = prop.trim().to_lowercase();
            let val = val.trim();
            if prop.is_empty() || val.is_empty() {
                continue;
            }
            let (val, important) = strip_important(val);
            decls.push(Declaration {
                property: prop,
                value: val,
                important,
            });
        }
    }
    decls
}

/// Strip `!important` suffix from a CSS value, returning the clean value
/// and whether the flag was present.
fn strip_important(val: &str) -> (String, bool) {
    // Match "!important" at the end, case-insensitive, with optional whitespace.
    let trimmed = val.trim();
    if let Some(prefix) = trimmed.strip_suffix("important") {
        let prefix = prefix.trim_end();
        if let Some(clean) = prefix.strip_suffix('!') {
            let clean = clean.trim_end();
            if !clean.is_empty() {
                return (clean.to_string(), true);
            }
        }
    }
    (trimmed.to_string(), false)
}

#[cfg(test)]
mod tests {
    use super::*;
    use oxipdf_ir::style::ResolvedStyle;

    #[test]
    fn specificity_calculation() {
        assert_eq!(
            compute_specificity("p"),
            Specificity {
                inline: 0,
                id: 0,
                class: 0,
                element: 1,
            }
        );
        assert_eq!(
            compute_specificity(".foo"),
            Specificity {
                inline: 0,
                id: 0,
                class: 1,
                element: 0,
            }
        );
        assert_eq!(
            compute_specificity("#bar"),
            Specificity {
                inline: 0,
                id: 1,
                class: 0,
                element: 0,
            }
        );
        assert_eq!(
            compute_specificity("div.foo#bar"),
            Specificity {
                inline: 0,
                id: 1,
                class: 1,
                element: 1,
            }
        );
    }

    #[test]
    fn parse_stylesheet_basic() {
        let css = "p { color: red; font-size: 14pt; } h1 { font-weight: bold; }";
        let rules = parse_stylesheet(css);
        assert_eq!(rules.len(), 2);
        assert_eq!(rules[0].selector, "p");
        assert_eq!(rules[0].declarations.len(), 2);
    }

    #[test]
    fn parse_comma_selectors() {
        let css = "h1, h2, h3 { font-weight: bold; }";
        let rules = parse_stylesheet(css);
        assert_eq!(rules.len(), 3);
    }

    #[test]
    fn apply_declarations_typography() {
        let mut style = ResolvedStyle::default();
        let decls = parse_declarations("font-size: 14pt; font-weight: bold; color: #ff0000");
        apply_declarations(&mut style, &decls);
        assert!((style.typography.font_size.get() - 14.0).abs() < 0.01);
        assert_eq!(style.typography.font_weight, 700);
    }

    #[test]
    fn apply_margin_shorthand_four_values() {
        let mut style = ResolvedStyle::default();
        let decls = parse_declarations("margin: 10px 20px 30px 40px");
        apply_declarations(&mut style, &decls);
        assert!(matches!(
            style.layout.margin_top,
            oxipdf_ir::Dimension::Length(_)
        ));
        assert!(matches!(
            style.layout.margin_left,
            oxipdf_ir::Dimension::Length(_)
        ));
    }

    #[test]
    fn important_flag_detected() {
        let decls = parse_declarations("color: red !important; font-size: 14pt");
        assert_eq!(decls.len(), 2);
        assert!(decls[0].important, "color should be !important");
        assert_eq!(decls[0].value, "red");
        assert!(!decls[1].important, "font-size should not be !important");
    }

    #[test]
    fn important_stripped_from_value() {
        let decls = parse_declarations("margin: 10px !important");
        assert_eq!(decls.len(), 1);
        assert_eq!(decls[0].value, "10px");
        assert!(decls[0].important);
    }
}