html-cat 0.1.0

HTML5 parser: tokenizer + tree builder producing a Document tree of Element/Text/Comment nodes. No mut, no Rc/Arc, no interior mutability, no panics, exhaustive matches. First sub-crate of a Servo-replacement webview runtime targeting Tauri.
//! Tree builder: token stream → [`Document`].
//!
//! Simplified HTML5 insertion-mode algorithm.  Handles the common
//! patterns: optional doctype, auto-insertion of `html`/`head`/`body`,
//! stack-based open-element tracking, void elements, and raw-text
//! contexts.  Foreign content, template, and the adoption agency
//! algorithm are deferred to v0.2.

use crate::attr::Attributes;
use crate::error::Error;
use crate::node::{Comment, Doctype, Document, Element, Node, Text, is_void_element};
use crate::span::{Position, Span};
use crate::token::Token;

/// Build a [`Document`] from a token stream.
///
/// # Errors
///
/// Currently never returns `Err`; the signature is `Result` so future
/// strict-mode callers can opt into surfaced errors.
pub fn build(tokens: Vec<Token>) -> Result<Document, Error> {
    let initial = BuilderState::new();
    let final_state = drive(tokens, 0, initial);
    Ok(finalize(final_state))
}

#[derive(Debug, Clone)]
struct BuilderState {
    doctype: Option<Doctype>,
    open_elements: Vec<PartialElement>,
    root_built: Option<Element>,
    document_span_start: Position,
    document_span_end: Position,
}

#[derive(Debug, Clone)]
struct PartialElement {
    name: String,
    attributes: Attributes,
    self_closing: bool,
    children: Vec<Node>,
    span_start: Position,
}

impl BuilderState {
    fn new() -> Self {
        Self {
            doctype: None,
            open_elements: Vec::new(),
            root_built: None,
            document_span_start: Position::new(1, 1, 0),
            document_span_end: Position::new(1, 1, 0),
        }
    }
}

fn drive(tokens: Vec<Token>, idx: usize, state: BuilderState) -> BuilderState {
    tokens.get(idx).cloned().map_or(state.clone(), |token| {
        let next_state = consume_token(state, token);
        drive(tokens, idx + 1, next_state)
    })
}

fn consume_token(state: BuilderState, token: Token) -> BuilderState {
    match token {
        Token::Doctype {
            name,
            public_id,
            system_id,
            span,
        } => with_doctype(state, name, public_id, system_id, span),
        Token::StartTag {
            name,
            attributes,
            self_closing,
            span,
        } => open_element(state, name, attributes, self_closing, span),
        Token::EndTag { name, span } => close_element(state, &name, span),
        Token::Text { content, span } => {
            append_node_to_top(state, &Node::Text(Text::new(content, span)), span)
        }
        Token::Comment { text, span } => {
            append_node_to_top(state, &Node::Comment(Comment::new(text, span)), span)
        }
        Token::Eof { span } => with_eof(state, span),
    }
}

fn with_doctype(
    state: BuilderState,
    name: String,
    public_id: Option<String>,
    system_id: Option<String>,
    span: Span,
) -> BuilderState {
    BuilderState {
        doctype: Some(Doctype::new(name, public_id, system_id, span)),
        document_span_end: span.end(),
        ..state
    }
}

fn open_element(
    state: BuilderState,
    name: String,
    attributes: Attributes,
    self_closing: bool,
    span: Span,
) -> BuilderState {
    let ensured = ensure_root_context(state, &name, span);
    let is_void = self_closing || is_void_element(&name);
    let partial = PartialElement {
        name,
        attributes,
        self_closing,
        children: Vec::new(),
        span_start: span.start(),
    };
    let extended_open: Vec<PartialElement> = ensured
        .open_elements
        .into_iter()
        .chain(std::iter::once(partial))
        .collect();
    let next_state = BuilderState {
        open_elements: extended_open,
        document_span_end: span.end(),
        ..ensured
    };
    if is_void {
        close_top(next_state, span)
    } else {
        next_state
    }
}

fn ensure_root_context(state: BuilderState, incoming: &str, span: Span) -> BuilderState {
    // If we're at the top with no open elements and incoming isn't `html`,
    // auto-insert html (and possibly head/body) so the user can write a
    // body-only fragment.
    if state.open_elements.is_empty() && state.root_built.is_none() && incoming != "html" {
        let synthetic_span = Span::new(span.start(), span.start());
        let html_partial = PartialElement {
            name: "html".to_owned(),
            attributes: Attributes::new(),
            self_closing: false,
            children: Vec::new(),
            span_start: synthetic_span.start(),
        };
        let with_html = BuilderState {
            open_elements: vec![html_partial],
            ..state
        };
        if incoming == "head" || incoming == "body" {
            with_html
        } else {
            // Auto-insert body for anything that isn't a head-only element.
            let body_partial = PartialElement {
                name: "body".to_owned(),
                attributes: Attributes::new(),
                self_closing: false,
                children: Vec::new(),
                span_start: synthetic_span.start(),
            };
            let extended: Vec<PartialElement> = with_html
                .open_elements
                .into_iter()
                .chain(std::iter::once(body_partial))
                .collect();
            BuilderState {
                open_elements: extended,
                ..with_html
            }
        }
    } else {
        state
    }
}

fn close_element(state: BuilderState, name: &str, span: Span) -> BuilderState {
    let lower = name.to_ascii_lowercase();
    let depth = state.open_elements.iter().rposition(|e| e.name == lower);
    if let Some(target_idx) = depth {
        close_down_to(state, target_idx, span)
    } else {
        state
    }
}

fn close_down_to(state: BuilderState, target_idx: usize, span: Span) -> BuilderState {
    if state.open_elements.len() <= target_idx + 1 {
        close_top(state, span)
    } else {
        close_down_to(close_top(state, span), target_idx, span)
    }
}

fn close_top(state: BuilderState, span: Span) -> BuilderState {
    if let Some(top) = state.open_elements.last().cloned() {
        let drop_idx = state.open_elements.len().saturating_sub(1);
        let rest: Vec<PartialElement> =
            state.open_elements.iter().take(drop_idx).cloned().collect();
        finish_top(state, top, rest, span)
    } else {
        state
    }
}

fn finish_top(
    state: BuilderState,
    top: PartialElement,
    rest: Vec<PartialElement>,
    span: Span,
) -> BuilderState {
    let element_span = Span::new(top.span_start, span.end());
    let finished = Element::new(
        top.name,
        top.attributes,
        top.children,
        top.self_closing,
        element_span,
    );
    if rest.is_empty() {
        BuilderState {
            open_elements: rest,
            root_built: Some(finished),
            document_span_end: span.end(),
            ..state
        }
    } else {
        let updated_parent_idx = rest.len() - 1;
        let updated_open: Vec<PartialElement> = rest
            .into_iter()
            .enumerate()
            .map(|(i, partial)| {
                if i == updated_parent_idx {
                    PartialElement {
                        children: partial
                            .children
                            .into_iter()
                            .chain(std::iter::once(Node::Element(finished.clone())))
                            .collect(),
                        ..partial
                    }
                } else {
                    partial
                }
            })
            .collect();
        BuilderState {
            open_elements: updated_open,
            document_span_end: span.end(),
            ..state
        }
    }
}

fn append_node_to_top(state: BuilderState, node: &Node, span: Span) -> BuilderState {
    let ensured = ensure_root_context_for_text(state, span);
    if ensured.open_elements.is_empty() {
        ensured
    } else {
        let top_idx = ensured.open_elements.len() - 1;
        let extended_open: Vec<PartialElement> = ensured
            .open_elements
            .into_iter()
            .enumerate()
            .map(|(i, partial)| {
                if i == top_idx {
                    PartialElement {
                        children: partial
                            .children
                            .into_iter()
                            .chain(std::iter::once(node.clone()))
                            .collect(),
                        ..partial
                    }
                } else {
                    partial
                }
            })
            .collect();
        BuilderState {
            open_elements: extended_open,
            document_span_end: span.end(),
            ..ensured
        }
    }
}

fn ensure_root_context_for_text(state: BuilderState, span: Span) -> BuilderState {
    if state.open_elements.is_empty() && state.root_built.is_none() {
        ensure_root_context(state, "body", span)
    } else {
        state
    }
}

fn with_eof(state: BuilderState, span: Span) -> BuilderState {
    close_all_open(
        BuilderState {
            document_span_end: span.end(),
            ..state
        },
        span,
    )
}

fn close_all_open(state: BuilderState, span: Span) -> BuilderState {
    if state.open_elements.is_empty() {
        state
    } else {
        close_all_open(close_top(state, span), span)
    }
}

fn finalize(state: BuilderState) -> Document {
    let root = state.root_built.unwrap_or_else(|| {
        Element::new(
            "html",
            Attributes::new(),
            Vec::new(),
            false,
            Span::new(state.document_span_start, state.document_span_end),
        )
    });
    Document::new(
        state.doctype,
        root,
        Span::new(state.document_span_start, state.document_span_end),
    )
}