oxipdf-html 0.1.0

HTML+CSS → StyledTree adapter for the oxipdf PDF engine
Documentation
//! Data URI parsing and base64 decoding utilities.
//!
//! No network I/O — only `data:` URIs are supported per the engine's
//! no-network-I/O-in-render-pipeline rule.

use oxipdf_ir::node::ImageFormat;

/// Parse a `data:` URI into raw bytes + ImageFormat.
pub(crate) fn parse_data_uri(uri: &str) -> Option<(Vec<u8>, ImageFormat)> {
    let rest = uri.strip_prefix("data:")?;
    let (mime_part, data_part) = rest.split_once(',')?;

    let is_base64 = mime_part.contains("base64");
    let mime = mime_part
        .split(';')
        .next()
        .unwrap_or_default()
        .to_lowercase();

    let format = match mime.as_str() {
        "image/png" => ImageFormat::Png,
        "image/jpeg" | "image/jpg" => ImageFormat::Jpeg,
        "image/webp" => ImageFormat::Webp,
        _ => return None,
    };

    let bytes = if is_base64 {
        base64_decode(data_part)?
    } else {
        data_part.as_bytes().to_vec()
    };

    Some((bytes, format))
}

/// Minimal base64 decoder (no external dependency).
fn base64_decode(input: &str) -> Option<Vec<u8>> {
    let input: Vec<u8> = input
        .bytes()
        .filter(|&b| !b.is_ascii_whitespace())
        .collect();
    let mut out = Vec::with_capacity(input.len() * 3 / 4);

    for chunk in input.chunks(4) {
        let mut buf = [0u8; 4];
        let mut count = 0;
        for &b in chunk {
            if b == b'=' {
                break;
            }
            buf[count] = b64_val(b)?;
            count += 1;
        }
        if count >= 2 {
            out.push((buf[0] << 2) | (buf[1] >> 4));
        }
        if count >= 3 {
            out.push((buf[1] << 4) | (buf[2] >> 2));
        }
        if count >= 4 {
            out.push((buf[2] << 6) | buf[3]);
        }
    }

    Some(out)
}

fn b64_val(b: u8) -> Option<u8> {
    match b {
        b'A'..=b'Z' => Some(b - b'A'),
        b'a'..=b'z' => Some(b - b'a' + 26),
        b'0'..=b'9' => Some(b - b'0' + 52),
        b'+' => Some(62),
        b'/' => Some(63),
        _ => None,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn base64_decode_basic() {
        let decoded = base64_decode("SGVsbG8=").unwrap();
        assert_eq!(decoded, b"Hello");
    }

    #[test]
    fn data_uri_png() {
        let uri = "data:image/png;base64,iVBORw0KGgo=";
        let result = parse_data_uri(uri);
        assert!(result.is_some());
        let (_, format) = result.unwrap();
        assert_eq!(format, ImageFormat::Png);
    }
}