1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
//! Stateful HTML renderer and public render entry point.
//!
//! The renderer struct lives here, while specialized child modules implement output
//! helpers, block and inline visitors, URL rewriting, callouts, and code block details.
//! This keeps the public constructor/render path visible without forcing unrelated
//! rendering rules into one large file.
mod blocks;
mod callout;
mod code_block;
mod incremental;
mod inlines;
mod links;
mod visit;
mod write;
use ox_content_ast::{Document, Node};
use rustc_hash::FxHashMap;
use super::autolink::FirstByteIndex;
use super::escape::{write_escaped_into, write_url_escaped_into};
use super::options::HtmlRendererOptions;
use super::toc::{collect_inline_toc_entries, scan_document_for_render, InlineTocEntry};
use crate::render::{RenderResult, Renderer};
/// Stateful HTML renderer for Markdown AST documents.
///
/// A renderer instance owns reusable buffers for heading IDs, inline table-of-contents
/// entries, and autolink scanning. Reusing the same instance across renders avoids a
/// set of hot-path allocations while keeping the public API as simple as
/// [`HtmlRenderer::render`].
pub struct HtmlRenderer {
options: HtmlRendererOptions,
output: String,
heading_id_counts: FxHashMap<String, usize>,
toc_entries: Vec<InlineTocEntry>,
/// Whether the document being rendered contains at least one
/// `[[toc]]` directive paragraph. Cached at `render()` entry so each
/// `visit_paragraph` can skip the marker check entirely when no
/// directive exists (the common case). Kept separate from
/// `toc_entries.is_empty()` because a document may have a marker
/// AND zero entries (no headings, or all filtered by `toc_max_depth`)
/// — in that case we still need to suppress the literal `[[toc]]`
/// text from the output.
document_has_toc_marker: bool,
/// Reusable scratch buffer for the raw concatenated heading text in
/// `heading_id`. A long-lived buffer avoids paying for a fresh
/// `String` allocation per heading — `slugify_heading` previously
/// allocated one `text` String per call.
heading_text_scratch: String,
/// Reusable scratch buffer for the slugified id. The final id String
/// that ends up in `heading_id_counts` is cloned out of here on
/// vacant inserts; the buffer itself stays around across renders.
heading_slug_scratch: String,
/// Suppresses URL auto-linking while we're already inside an `<a>` so
/// the builtin can't nest anchors. Tracked manually rather than via
/// the AST because `visit_text` can be reached through many parents
/// (paragraphs, headings, emphasis, …) and only the link case needs
/// to mask it out.
in_link: bool,
/// First-byte skip index for the autolink scanner. It depends only on
/// `options.autolink_patterns`, which is immutable for the duration of a
/// render, so it is built once at `render()` entry and reused for every
/// text node instead of being rebuilt per node (the prior behaviour zeroed
/// and filled a 256-byte table on the hottest inline path). `None` when
/// autolinking is disabled or there are no patterns.
autolink_index: Option<FirstByteIndex>,
}
impl HtmlRenderer {
/// Creates a new HTML renderer with default options.
#[must_use]
pub fn new() -> Self {
Self::with_options(HtmlRendererOptions::new())
}
/// Creates a new HTML renderer with the specified options.
#[must_use]
pub fn with_options(options: HtmlRendererOptions) -> Self {
Self {
options,
output: String::new(),
heading_id_counts: FxHashMap::default(),
toc_entries: Vec::new(),
document_has_toc_marker: false,
// Pre-size the heading scratch buffers: a typical heading text
// is well under 64 chars. Pre-allocating spares the first
// heading from a `String::with_capacity(0)` → `reserve(N)`
// round-trip without meaningful memory cost (these buffers
// live for the renderer's lifetime regardless).
heading_text_scratch: String::with_capacity(64),
heading_slug_scratch: String::with_capacity(64),
in_link: false,
autolink_index: None,
}
}
/// Renders a document to HTML string.
#[must_use]
pub fn render(&mut self, document: &Document<'_>) -> String {
crate::profile_span!("renderer::render");
self.output.clear();
// Renderer setup is intentionally split into a cheap structural scan
// and the expensive optional work. TOC collection walks every heading
// and allocates a slug per entry, which used to fire on every render
// regardless of whether a `[[toc]]` directive existed. The scan below
// records only booleans/counts, so documents without TOC markers skip
// all TOC allocation while the heading count still lets us reserve the
// unique-id map once.
self.toc_entries.clear();
let document_scan = scan_document_for_render(document);
self.document_has_toc_marker = document_scan.has_toc_marker;
if self.document_has_toc_marker {
collect_inline_toc_entries(document, self.options.toc_max_depth, &mut self.toc_entries);
}
self.heading_id_counts.clear();
self.heading_id_counts.reserve(document_scan.heading_count);
// Build the autolink first-byte index once per render. It depends only
// on the immutable pattern list, not on the text node being rendered,
// so reusing it avoids rebuilding a 256-byte table on every inline
// text visit.
self.autolink_index =
if self.options.autolink_urls && !self.options.autolink_patterns.is_empty() {
Some(FirstByteIndex::from_patterns(&self.options.autolink_patterns))
} else {
None
};
// HTML output is typically 2×–3× the markdown source (every
// `**bold**` becomes `<strong>...</strong>` etc.) so the prior
// 1.5× estimate kept undersizing the buffer and forcing 1–2
// power-of-two reallocs per render on docs >32 KB. 2× hits the
// realistic mean for the bundled corpora (rust-book / vite /
// vue / typescript-handbook all land between 1.8× and 2.6×).
let estimated_len = (document.span.len() as usize).saturating_mul(2);
if self.output.capacity() < estimated_len {
self.output.reserve(estimated_len - self.output.capacity());
}
self.render_document(document);
std::mem::take(&mut self.output)
}
pub(in crate::html::renderer) fn render_document(&mut self, document: &Document<'_>) {
for child in &document.children {
self.render_node(child);
}
}
#[inline]
pub(in crate::html::renderer) fn render_node(&mut self, node: &Node<'_>) {
match node {
Node::Paragraph(node) => self.render_paragraph(node),
Node::Heading(node) => self.render_heading(node),
Node::ThematicBreak(node) => self.render_thematic_break(node),
Node::BlockQuote(node) => self.render_block_quote(node),
Node::List(node) => self.render_list(node),
Node::ListItem(node) => self.render_list_item(node),
Node::CodeBlock(node) => self.render_code_block(node),
Node::Html(node) => self.render_html(node),
Node::Table(node) => self.render_table(node),
Node::Text(node) => self.render_text(node),
Node::Emphasis(node) => self.render_emphasis(node),
Node::Strong(node) => self.render_strong(node),
Node::InlineCode(node) => self.render_inline_code(node),
Node::Break(node) => self.render_break(node),
Node::Link(node) => self.render_link(node),
Node::Image(node) => self.render_image(node),
Node::Delete(node) => self.render_delete(node),
Node::FootnoteReference(node) => self.render_footnote_reference(node),
Node::Definition(_) => {}
Node::FootnoteDefinition(node) => self.render_footnote_definition(node),
}
}
pub(in crate::html::renderer) fn render_inline_toc(&mut self) {
use std::fmt::Write as _;
if self.toc_entries.is_empty() {
return;
}
self.write("<nav class=\"ox-toc\" aria-label=\"Table of contents\">\n<ul>\n");
for entry in &self.toc_entries {
self.output.push_str("<li class=\"ox-toc__item ox-toc__item--depth-");
let _ = write!(self.output, "{}", entry.depth);
self.output.push_str("\"><a href=\"#");
write_url_escaped_into(&mut self.output, &entry.id);
self.output.push_str("\">");
write_escaped_into(&mut self.output, &entry.text);
self.output.push_str("</a></li>\n");
}
self.write("</ul>\n</nav>\n");
}
}
impl Default for HtmlRenderer {
fn default() -> Self {
Self::new()
}
}
impl Renderer for HtmlRenderer {
type Output = String;
fn render(&mut self, document: &Document<'_>) -> RenderResult<Self::Output> {
Ok(self.render(document))
}
}