use crate::{Error, Result};
use super::{
FlowDocument, FlowOptions, InlineSpan, Margins,
html_tokenizer::{HtmlNode, parse_html},
};
pub struct HtmlRenderOptions {
pub font_bytes: Vec<u8>,
pub page_size: (f32, f32),
pub margins: Margins,
pub body_font_size: f32,
pub line_height_factor: f32,
pub max_pages: u32,
}
impl Default for HtmlRenderOptions {
fn default() -> Self {
HtmlRenderOptions {
font_bytes: Vec::new(),
page_size: (595.0, 842.0),
margins: Margins::a4_standard(),
body_font_size: 11.0,
line_height_factor: 1.4,
max_pages: 2000,
}
}
}
pub fn render_html_to_pdf(html: &str, options: HtmlRenderOptions) -> Result<Vec<u8>> {
if options.font_bytes.is_empty() {
return Err(Error::InvalidInput(
"HtmlRenderOptions.font_bytes must be set to a valid TTF/OTF font".into(),
));
}
let flow_opts = FlowOptions {
page_size: options.page_size,
margins: options.margins,
body_font_size: options.body_font_size,
line_height_factor: options.line_height_factor,
max_pages: options.max_pages,
..FlowOptions::default()
};
let mut flow = FlowDocument::new(options.font_bytes, flow_opts)?;
let document = parse_html(html);
for child in document.children() {
walk_iterative(child, &mut flow)?;
}
flow.render()
}
fn walk_iterative<'a>(root: &'a HtmlNode, flow: &mut FlowDocument) -> Result<()> {
let mut stack: Vec<&'a HtmlNode> = vec![root];
while let Some(elem) = stack.pop() {
process_one(elem, flow, &mut stack)?;
}
Ok(())
}
fn process_one<'a>(
elem: &'a HtmlNode,
flow: &mut FlowDocument,
stack: &mut Vec<&'a HtmlNode>,
) -> Result<()> {
let tag = match elem.tag_name() {
Some(t) => t,
None => return Ok(()), };
match tag {
"h1" | "h2" | "h3" | "h4" | "h5" | "h6" => {
let level: u8 = tag[1..].parse().unwrap_or(1);
let text = collect_text(elem);
if !text.trim().is_empty() {
flow.push_heading(text.trim(), level)?;
}
}
"p" => {
let spans = collect_inline_spans(elem);
let has_content = spans.iter().any(|s| !s.text.trim().is_empty());
if has_content {
flow.push_paragraph_styled(&spans)?;
}
}
"table" => {
process_table(elem, flow)?;
}
"ul" => {
process_list(elem, flow, false)?;
}
"ol" => {
process_list(elem, flow, true)?;
}
"head" | "script" | "style" | "meta" | "link" | "title" | "noscript" => {}
_ => {
let children: Vec<&HtmlNode> = elem.children().collect();
for child in children.into_iter().rev() {
stack.push(child);
}
}
}
if has_page_break(elem) {
flow.push_page_break()?;
}
Ok(())
}
fn collect_text(elem: &HtmlNode) -> String {
elem.text_content()
}
fn collect_inline_spans(elem: &HtmlNode) -> Vec<InlineSpan> {
let mut spans: Vec<InlineSpan> = Vec::new();
collect_inline_spans_inner(elem, false, false, [0.0; 3], &mut spans);
if let Some(first) = spans.first_mut() {
let trimmed = first.text.trim_start().to_owned();
first.text = trimmed;
}
if let Some(last) = spans.last_mut() {
let trimmed = last.text.trim_end().to_owned();
last.text = trimmed;
}
spans.retain(|s| !s.text.is_empty());
spans
}
fn collect_inline_spans_inner(
elem: &HtmlNode,
parent_bold: bool,
parent_italic: bool,
parent_color: [f32; 3],
out: &mut Vec<InlineSpan>,
) {
let tag = match elem.tag_name() {
Some(t) => t,
None => {
if let Some(text) = elem.as_text() {
if !text.is_empty() {
out.push(InlineSpan {
text: text.to_string(),
bold: parent_bold,
italic: parent_italic,
color: parent_color.into(),
});
}
}
return;
}
};
let bold = parent_bold || matches!(tag, "strong" | "b");
let italic = parent_italic || matches!(tag, "em" | "i");
let color = inherited_color(elem, tag, parent_color);
for child in elem.children() {
let child_tag = child.tag_name();
if let Some(ct) = child_tag {
if matches!(ct, "script" | "style" | "head") {
continue;
}
}
collect_inline_spans_inner(child, bold, italic, color, out);
}
}
fn inherited_color(elem: &HtmlNode, tag: &str, parent_color: [f32; 3]) -> [f32; 3] {
if tag == "a" {
return [0.0, 0.0, 0.8];
}
if let Some(style) = elem.attr("style") {
if let Some(c) = parse_css_color(&style) {
return c;
}
}
parent_color
}
fn parse_css_color(style: &str) -> Option<[f32; 3]> {
let lower = style.to_ascii_lowercase();
let start = lower.find("color:")? + 6;
let value = lower[start..].trim_start();
if let Some(hex) = value.strip_prefix('#') {
let hex = hex.split(|c: char| !c.is_ascii_hexdigit()).next()?;
return match hex.len() {
6 => {
let r = u8::from_str_radix(&hex[0..2], 16).ok()? as f32 / 255.0;
let g = u8::from_str_radix(&hex[2..4], 16).ok()? as f32 / 255.0;
let b = u8::from_str_radix(&hex[4..6], 16).ok()? as f32 / 255.0;
Some([r, g, b])
}
3 => {
let r = u8::from_str_radix(&hex[0..1].repeat(2), 16).ok()? as f32 / 255.0;
let g = u8::from_str_radix(&hex[1..2].repeat(2), 16).ok()? as f32 / 255.0;
let b = u8::from_str_radix(&hex[2..3].repeat(2), 16).ok()? as f32 / 255.0;
Some([r, g, b])
}
_ => None,
};
}
if let Some(inner) = value.strip_prefix("rgb(") {
let inner = inner.split(')').next()?;
let parts: Vec<&str> = inner.split(',').collect();
if parts.len() == 3 {
let r = parts[0].trim().parse::<f32>().ok()? / 255.0;
let g = parts[1].trim().parse::<f32>().ok()? / 255.0;
let b = parts[2].trim().parse::<f32>().ok()? / 255.0;
return Some([r, g, b]);
}
}
None
}
fn has_page_break(elem: &HtmlNode) -> bool {
let style = elem.attr("style").unwrap_or_default();
let class = elem.attr("class").unwrap_or_default();
style.contains("page-break-after: always")
|| style.contains("page-break-after:always")
|| class.split_whitespace().any(|c| c == "page-break")
}
fn table_rows(table: &HtmlNode) -> Vec<&HtmlNode> {
let mut rows = Vec::new();
for child in table.children() {
match child.tag_name() {
Some("tr") => rows.push(child),
Some("tbody") | Some("thead") | Some("tfoot") => {
for tr in child.children() {
if tr.tag_name() == Some("tr") {
rows.push(tr);
}
}
}
_ => {}
}
}
rows
}
fn process_table(table: &HtmlNode, flow: &mut FlowDocument) -> Result<()> {
let mut rows: Vec<(String, String)> = Vec::new();
for tr in table_rows(table) {
let cells: Vec<String> = tr
.children()
.filter(|e| matches!(e.tag_name(), Some("th") | Some("td")))
.map(|e| collect_text(e).trim().to_owned())
.collect();
match cells.len() {
0 => {}
1 => rows.push((cells[0].clone(), String::new())),
_ => rows.push((cells[0].clone(), cells[1].clone())),
}
}
if rows.is_empty() {
return Ok(());
}
let rows_ref: Vec<(&str, &str)> = rows.iter().map(|(k, v)| (k.as_str(), v.as_str())).collect();
flow.push_key_value_table(&rows_ref)
}
fn process_list(list: &HtmlNode, flow: &mut FlowDocument, ordered: bool) -> Result<()> {
let items: Vec<String> = list
.children()
.filter(|e| e.tag_name() == Some("li"))
.map(|li| collect_text(li).trim().to_owned())
.filter(|s| !s.is_empty())
.collect();
if items.is_empty() {
return Ok(());
}
let items_ref: Vec<&str> = items.iter().map(String::as_str).collect();
flow.push_list(&items_ref, ordered)
}