Skip to main content

docspec_html_writer/
lib.rs

1#![forbid(unsafe_code)]
2
3//! Streaming HTML5 writer for `DocSpec` events.
4
5use docspec_core::{Event, EventSink, Result};
6use html5ever::serialize::{HtmlSerializer, SerializeOpts, Serializer as _};
7use html5ever::{local_name, ns, LocalName, QualName};
8use std::io::Write;
9
10/// A streaming HTML5 writer for `DocSpec` events.
11///
12/// Writes HTML5 markup directly to the underlying `Write` as events arrive.
13/// Implements [`EventSink`] for integration with the `DocSpec` pipeline.
14///
15/// # Type Parameters
16///
17/// * `W` - Any type implementing [`Write`]
18pub struct HtmlWriter<W: Write> {
19    finished: bool,
20    in_paragraph: bool,
21    serializer: HtmlSerializer<W>,
22    started: bool,
23}
24
25impl<W: Write> HtmlWriter<W> {
26    fn close(&mut self, local: LocalName) -> Result<()> {
27        let name = QualName::new(None, ns!(html), local);
28        self.serializer.end_elem(name)?;
29        Ok(())
30    }
31
32    /// Creates a new `HtmlWriter` that writes to the given writer.
33    #[inline]
34    #[must_use]
35    pub fn new(writer: W) -> Self {
36        Self {
37            serializer: HtmlSerializer::new(writer, SerializeOpts::default()),
38            started: false,
39            finished: false,
40            in_paragraph: false,
41        }
42    }
43
44    fn open(&mut self, local: LocalName) -> Result<()> {
45        let name = QualName::new(None, ns!(html), local);
46        self.serializer
47            .start_elem(name, core::iter::empty::<(&QualName, &str)>())?;
48        Ok(())
49    }
50}
51
52impl<W: Write> EventSink for HtmlWriter<W> {
53    #[inline]
54    fn finish(mut self) -> Result<()> {
55        self.serializer.writer.flush()?;
56        Ok(())
57    }
58
59    #[inline]
60    fn handle_event(&mut self, event: Event) -> Result<()> {
61        match event {
62            Event::StartDocument { .. } => {
63                if !self.started && !self.finished {
64                    self.open(local_name!("html"))?;
65                    self.open(local_name!("body"))?;
66                    self.started = true;
67                }
68            }
69            Event::EndDocument => {
70                if self.started && !self.finished {
71                    if self.in_paragraph {
72                        self.close(local_name!("p"))?;
73                        self.in_paragraph = false;
74                    }
75                    self.close(local_name!("body"))?;
76                    self.close(local_name!("html"))?;
77                    self.finished = true;
78                }
79            }
80            Event::StartParagraph { .. } => {
81                if self.started && !self.finished && !self.in_paragraph {
82                    self.open(local_name!("p"))?;
83                    self.in_paragraph = true;
84                }
85            }
86            Event::EndParagraph => {
87                if self.in_paragraph {
88                    self.close(local_name!("p"))?;
89                    self.in_paragraph = false;
90                }
91            }
92            Event::Text { content } if self.in_paragraph => {
93                self.serializer.write_text(&content)?;
94            }
95            _ => {
96                // HTML writer drops all inline styles (StartTextStyle, EndTextStyle) per documented contract
97            }
98        }
99        Ok(())
100    }
101}
102
103#[cfg(test)]
104mod tests {
105    #![allow(clippy::panic_in_result_fn, clippy::unwrap_used)]
106
107    use super::HtmlWriter;
108    use docspec_core::{Event, EventSink as _, Result};
109
110    fn assert_output(events: impl IntoIterator<Item = Event>, expected: &str) {
111        let mut buf: Vec<u8> = Vec::new();
112        let writer = HtmlWriter::new(&mut buf);
113        let _r = docspec_test_utils::try_drive(writer, events);
114        let output = String::from_utf8(buf).unwrap();
115        assert_eq!(output, expected);
116    }
117
118    #[test]
119    fn autoclose_paragraph_on_enddocument() {
120        assert_output(
121            [
122                Event::StartDocument {
123                    id: None,
124                    language: None,
125                    metadata: None,
126                },
127                Event::StartParagraph {
128                    alignment: None,
129                    id: None,
130                },
131                Event::Text {
132                    content: "oops".to_string(),
133                },
134                Event::EndDocument,
135            ],
136            "<html><body><p>oops</p></body></html>",
137        );
138    }
139
140    #[test]
141    fn double_start_document_is_noop() {
142        assert_output(
143            [
144                Event::StartDocument {
145                    id: None,
146                    language: None,
147                    metadata: None,
148                },
149                Event::StartDocument {
150                    id: None,
151                    language: None,
152                    metadata: None,
153                },
154                Event::EndDocument,
155            ],
156            "<html><body></body></html>",
157        );
158    }
159
160    #[test]
161    fn empty_document_exact_output() {
162        assert_output(
163            [
164                Event::StartDocument {
165                    id: None,
166                    language: None,
167                    metadata: None,
168                },
169                Event::EndDocument,
170            ],
171            "<html><body></body></html>",
172        );
173    }
174
175    #[test]
176    fn end_paragraph_without_start() {
177        assert_output(
178            [
179                Event::StartDocument {
180                    id: None,
181                    language: None,
182                    metadata: None,
183                },
184                Event::EndParagraph,
185                Event::EndDocument,
186            ],
187            "<html><body></body></html>",
188        );
189    }
190
191    #[test]
192    fn escapes_special_chars() {
193        assert_output(
194            [
195                Event::StartDocument {
196                    id: None,
197                    language: None,
198                    metadata: None,
199                },
200                Event::StartParagraph {
201                    alignment: None,
202                    id: None,
203                },
204                Event::Text {
205                    content: "a & b < c > d".to_string(),
206                },
207                Event::EndParagraph,
208                Event::EndDocument,
209            ],
210            "<html><body><p>a &amp; b &lt; c &gt; d</p></body></html>",
211        );
212    }
213
214    #[test]
215    fn finish_after_normal_document_succeeds() -> Result<()> {
216        let mut buf: Vec<u8> = Vec::new();
217        let mut writer = HtmlWriter::new(&mut buf);
218        writer.handle_event(Event::StartDocument {
219            id: None,
220            language: None,
221            metadata: None,
222        })?;
223        writer.handle_event(Event::StartParagraph {
224            alignment: None,
225            id: None,
226        })?;
227        writer.handle_event(Event::Text {
228            content: "hello".to_string(),
229        })?;
230        writer.handle_event(Event::EndParagraph)?;
231        writer.handle_event(Event::EndDocument)?;
232        writer.finish()
233    }
234
235    #[test]
236    fn ignored_events_no_effect() {
237        assert_output(
238            [
239                Event::StartDocument {
240                    id: None,
241                    language: None,
242                    metadata: None,
243                },
244                Event::StartHeading { level: 1, id: None },
245                Event::EndHeading,
246                Event::StartParagraph {
247                    alignment: None,
248                    id: None,
249                },
250                Event::Text {
251                    content: "x".to_string(),
252                },
253                Event::EndParagraph,
254                Event::ThematicBreak { id: None },
255                Event::EndDocument,
256            ],
257            "<html><body><p>x</p></body></html>",
258        );
259    }
260
261    #[test]
262    fn paragraph_with_text() {
263        assert_output(
264            [
265                Event::StartDocument {
266                    id: None,
267                    language: None,
268                    metadata: None,
269                },
270                Event::StartParagraph {
271                    alignment: None,
272                    id: None,
273                },
274                Event::Text {
275                    content: "hello".to_string(),
276                },
277                Event::EndParagraph,
278                Event::EndDocument,
279            ],
280            "<html><body><p>hello</p></body></html>",
281        );
282    }
283
284    #[test]
285    fn start_paragraph_while_in_paragraph() {
286        assert_output(
287            [
288                Event::StartDocument {
289                    id: None,
290                    language: None,
291                    metadata: None,
292                },
293                Event::StartParagraph {
294                    alignment: None,
295                    id: None,
296                },
297                Event::StartParagraph {
298                    alignment: None,
299                    id: None,
300                },
301                Event::Text {
302                    content: "x".to_string(),
303                },
304                Event::EndParagraph,
305                Event::EndDocument,
306            ],
307            "<html><body><p>x</p></body></html>",
308        );
309    }
310
311    #[test]
312    fn text_outside_paragraph_ignored() {
313        assert_output(
314            [
315                Event::StartDocument {
316                    id: None,
317                    language: None,
318                    metadata: None,
319                },
320                Event::Text {
321                    content: "ignored".to_string(),
322                },
323                Event::EndDocument,
324            ],
325            "<html><body></body></html>",
326        );
327    }
328}