Skip to main content

docspec_html_writer/
lib.rs

1#![forbid(unsafe_code)]
2
3//! Streaming HTML5 writer for `DocSpec` events.
4
5use docspec_core::{Event, EventSink, Result};
6use html5ever::serialize::{HtmlSerializer, SerializeOpts, Serializer as _};
7use html5ever::{local_name, ns, LocalName, QualName};
8use std::io::Write;
9
10/// A streaming HTML5 writer for `DocSpec` events.
11///
12/// Writes HTML5 markup directly to the underlying `Write` as events arrive.
13/// Implements [`EventSink`] for integration with the `DocSpec` pipeline.
14///
15/// # Type Parameters
16///
17/// * `W` - Any type implementing [`Write`]
18pub struct HtmlWriter<W: Write> {
19    finished: bool,
20    in_paragraph: bool,
21    serializer: HtmlSerializer<W>,
22    started: bool,
23}
24
25impl<W: Write> HtmlWriter<W> {
26    fn close(&mut self, local: LocalName) -> Result<()> {
27        let name = QualName::new(None, ns!(html), local);
28        self.serializer.end_elem(name)?;
29        Ok(())
30    }
31
32    /// Creates a new `HtmlWriter` that writes to the given writer.
33    #[inline]
34    #[must_use]
35    pub fn new(writer: W) -> Self {
36        Self {
37            serializer: HtmlSerializer::new(writer, SerializeOpts::default()),
38            started: false,
39            finished: false,
40            in_paragraph: false,
41        }
42    }
43
44    fn open(&mut self, local: LocalName) -> Result<()> {
45        let name = QualName::new(None, ns!(html), local);
46        self.serializer
47            .start_elem(name, core::iter::empty::<(&QualName, &str)>())?;
48        Ok(())
49    }
50}
51
52impl<W: Write> EventSink for HtmlWriter<W> {
53    #[inline]
54    fn finish(mut self) -> Result<()> {
55        self.serializer.writer.flush()?;
56        Ok(())
57    }
58
59    #[inline]
60    fn handle_event(&mut self, event: Event) -> Result<()> {
61        match event {
62            Event::StartDocument { .. } => {
63                if !self.started && !self.finished {
64                    self.open(local_name!("html"))?;
65                    self.open(local_name!("body"))?;
66                    self.started = true;
67                }
68            }
69            Event::EndDocument => {
70                if self.started && !self.finished {
71                    if self.in_paragraph {
72                        self.close(local_name!("p"))?;
73                        self.in_paragraph = false;
74                    }
75                    self.close(local_name!("body"))?;
76                    self.close(local_name!("html"))?;
77                    self.finished = true;
78                }
79            }
80            Event::StartParagraph { .. } => {
81                if self.started && !self.finished && !self.in_paragraph {
82                    self.open(local_name!("p"))?;
83                    self.in_paragraph = true;
84                }
85            }
86            Event::EndParagraph => {
87                if self.in_paragraph {
88                    self.close(local_name!("p"))?;
89                    self.in_paragraph = false;
90                }
91            }
92            Event::Text { content } if self.in_paragraph => {
93                self.serializer.write_text(&content)?;
94            }
95            _ => {
96                // HTML writer drops all inline styles (StartTextStyle, EndTextStyle) per documented contract
97            }
98        }
99        Ok(())
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    #![allow(clippy::panic_in_result_fn, clippy::unwrap_used)]
106
107    use super::HtmlWriter;
108    use docspec_core::{Event, EventSink as _, Result};
109
110    fn assert_output(events: impl IntoIterator<Item = Event>, expected: &str) {
111        let mut buf: Vec<u8> = Vec::new();
112        let mut writer = HtmlWriter::new(&mut buf);
113        for e in events {
114            let _r = writer.handle_event(e);
115        }
116        let _r = writer.finish();
117        let output = String::from_utf8(buf).unwrap();
118        assert_eq!(output, expected);
119    }
120
121    #[test]
122    fn autoclose_paragraph_on_enddocument() {
123        assert_output(
124            [
125                Event::StartDocument {
126                    id: None,
127                    language: None,
128                    metadata: None,
129                },
130                Event::StartParagraph {
131                    alignment: None,
132                    id: None,
133                },
134                Event::Text {
135                    content: "oops".to_string(),
136                },
137                Event::EndDocument,
138            ],
139            "<html><body><p>oops</p></body></html>",
140        );
141    }
142
143    #[test]
144    fn double_start_document_is_noop() {
145        assert_output(
146            [
147                Event::StartDocument {
148                    id: None,
149                    language: None,
150                    metadata: None,
151                },
152                Event::StartDocument {
153                    id: None,
154                    language: None,
155                    metadata: None,
156                },
157                Event::EndDocument,
158            ],
159            "<html><body></body></html>",
160        );
161    }
162
163    #[test]
164    fn empty_document_exact_output() {
165        assert_output(
166            [
167                Event::StartDocument {
168                    id: None,
169                    language: None,
170                    metadata: None,
171                },
172                Event::EndDocument,
173            ],
174            "<html><body></body></html>",
175        );
176    }
177
178    #[test]
179    fn end_paragraph_without_start() {
180        assert_output(
181            [
182                Event::StartDocument {
183                    id: None,
184                    language: None,
185                    metadata: None,
186                },
187                Event::EndParagraph,
188                Event::EndDocument,
189            ],
190            "<html><body></body></html>",
191        );
192    }
193
194    #[test]
195    fn escapes_special_chars() {
196        assert_output(
197            [
198                Event::StartDocument {
199                    id: None,
200                    language: None,
201                    metadata: None,
202                },
203                Event::StartParagraph {
204                    alignment: None,
205                    id: None,
206                },
207                Event::Text {
208                    content: "a & b < c > d".to_string(),
209                },
210                Event::EndParagraph,
211                Event::EndDocument,
212            ],
213            "<html><body><p>a &amp; b &lt; c &gt; d</p></body></html>",
214        );
215    }
216
217    #[test]
218    fn finish_after_normal_document_succeeds() -> Result<()> {
219        let mut buf: Vec<u8> = Vec::new();
220        let mut writer = HtmlWriter::new(&mut buf);
221        writer.handle_event(Event::StartDocument {
222            id: None,
223            language: None,
224            metadata: None,
225        })?;
226        writer.handle_event(Event::StartParagraph {
227            alignment: None,
228            id: None,
229        })?;
230        writer.handle_event(Event::Text {
231            content: "hello".to_string(),
232        })?;
233        writer.handle_event(Event::EndParagraph)?;
234        writer.handle_event(Event::EndDocument)?;
235        writer.finish()
236    }
237
238    #[test]
239    fn ignored_events_no_effect() {
240        assert_output(
241            [
242                Event::StartDocument {
243                    id: None,
244                    language: None,
245                    metadata: None,
246                },
247                Event::StartHeading { level: 1, id: None },
248                Event::EndHeading,
249                Event::StartParagraph {
250                    alignment: None,
251                    id: None,
252                },
253                Event::Text {
254                    content: "x".to_string(),
255                },
256                Event::EndParagraph,
257                Event::ThematicBreak { id: None },
258                Event::EndDocument,
259            ],
260            "<html><body><p>x</p></body></html>",
261        );
262    }
263
264    #[test]
265    fn paragraph_with_text() {
266        assert_output(
267            [
268                Event::StartDocument {
269                    id: None,
270                    language: None,
271                    metadata: None,
272                },
273                Event::StartParagraph {
274                    alignment: None,
275                    id: None,
276                },
277                Event::Text {
278                    content: "hello".to_string(),
279                },
280                Event::EndParagraph,
281                Event::EndDocument,
282            ],
283            "<html><body><p>hello</p></body></html>",
284        );
285    }
286
287    #[test]
288    fn start_paragraph_while_in_paragraph() {
289        assert_output(
290            [
291                Event::StartDocument {
292                    id: None,
293                    language: None,
294                    metadata: None,
295                },
296                Event::StartParagraph {
297                    alignment: None,
298                    id: None,
299                },
300                Event::StartParagraph {
301                    alignment: None,
302                    id: None,
303                },
304                Event::Text {
305                    content: "x".to_string(),
306                },
307                Event::EndParagraph,
308                Event::EndDocument,
309            ],
310            "<html><body><p>x</p></body></html>",
311        );
312    }
313
314    #[test]
315    fn text_outside_paragraph_ignored() {
316        assert_output(
317            [
318                Event::StartDocument {
319                    id: None,
320                    language: None,
321                    metadata: None,
322                },
323                Event::Text {
324                    content: "ignored".to_string(),
325                },
326                Event::EndDocument,
327            ],
328            "<html><body></body></html>",
329        );
330    }
331}