oxipdf-html 0.1.0

HTML+CSS → StyledTree adapter for the oxipdf PDF engine
Documentation
//! Stylesheet collection from `<link>` and `<style>` elements.

use std::path::{Path, PathBuf};

use scraper::{Html, Selector};

use crate::css::{self, CssRule};

/// Collect CSS rules from `<link rel="stylesheet" href="...">` elements.
///
/// Resolves relative `href` paths against `base_dir`. Absolute paths are
/// used directly. Files that fail to load are silently skipped.
pub(crate) fn collect_link_stylesheets(document: &Html, base_dir: Option<&Path>) -> Vec<CssRule> {
    let link_sel = Selector::parse("link").expect("'link' is a valid CSS selector");
    let mut rules = Vec::new();
    for link_el in document.select(&link_sel) {
        let rel = link_el.value().attr("rel").unwrap_or_default();
        if !rel.eq_ignore_ascii_case("stylesheet") {
            continue;
        }
        let href = match link_el.value().attr("href") {
            Some(h) if !h.is_empty() => h,
            _ => continue,
        };

        let path = resolve_css_path(href, base_dir);
        if let Some(path) = path {
            if let Ok(css_text) = std::fs::read_to_string(&path) {
                rules.extend(css::parse_stylesheet(&css_text));
            }
        }
    }
    rules
}

/// Resolve a CSS `href` to a filesystem path.
pub(crate) fn resolve_css_path(href: &str, base_dir: Option<&Path>) -> Option<PathBuf> {
    // Skip network URLs — no I/O.
    if href.starts_with("http://") || href.starts_with("https://") || href.starts_with("//") {
        return None;
    }

    let path = Path::new(href);
    if path.is_absolute() {
        return Some(path.to_path_buf());
    }

    // Relative path needs base_dir.
    base_dir.map(|dir| dir.join(path))
}

/// Collect CSS rules from all `<style>` elements in the document.
pub(crate) fn collect_style_rules(document: &Html) -> Vec<CssRule> {
    let style_sel = Selector::parse("style").expect("'style' is a valid CSS selector");
    let mut rules = Vec::new();
    for style_el in document.select(&style_sel) {
        let css_text = style_el.text().collect::<String>();
        rules.extend(css::parse_stylesheet(&css_text));
    }
    rules
}