subplot/
html.rs

1//! A representation of HTML using Rust types.
2
3#![deny(missing_docs)]
4
5use html_escape::{encode_double_quoted_attribute, encode_text};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::io::Write;
10use std::path::{Path, PathBuf};
11use tracing::{debug, trace};
12
13use crate::blockattr::{BlockAttr, BlockAttrError};
14
15const DOCTYPE: &str = "<!DOCTYPE html>";
16
17/// A HTML page, consisting of a head and a body.
18#[derive(Debug)]
19pub struct HtmlPage {
20    head: Element,
21    body: Element,
22}
23
24impl Default for HtmlPage {
25    fn default() -> Self {
26        Self {
27            head: Element::new(ElementTag::Head),
28            body: Element::new(ElementTag::Body),
29        }
30    }
31}
32
33impl HtmlPage {
34    /// Create a new HTML page from a head and a body element.
35    pub fn new(head: Element, body: Element) -> Self {
36        Self { head, body }
37    }
38
39    /// Return the page's head element.
40    pub fn head(&self) -> &Element {
41        &self.head
42    }
43
44    /// Return the page's body element.
45    pub fn body(&self) -> &Element {
46        &self.body
47    }
48
49    /// Try to serialize an HTML page into HTML text.
50    pub fn serialize(&self) -> Result<String, HtmlError> {
51        let mut html = Element::new(ElementTag::Html);
52        html.push_child(Content::Elt(self.head.clone()));
53        let mut body = Element::new(ElementTag::Body);
54        body.push_child(Content::Elt(self.body.clone()));
55        html.push_child(Content::Elt(body));
56        let html = html.serialize()?;
57        Ok(format!("{}\n{}", DOCTYPE, html))
58    }
59
60    /// Try to write an HTML page as text into a file.
61    pub fn write(&self, filename: &Path) -> Result<(), HtmlError> {
62        if let Some(parent) = filename.parent() {
63            trace!("parent: {}", parent.display());
64            if !parent.exists() {
65                debug!("creating directory {}", parent.display());
66                std::fs::create_dir_all(parent)
67                    .map_err(|e| HtmlError::CreateDir(parent.into(), e))?;
68            }
69        }
70
71        trace!("writing HTML: {}", filename.display());
72        let mut f = std::fs::File::create(filename)
73            .map_err(|e| HtmlError::CreateFile(filename.into(), e))?;
74        let html = self.serialize()?;
75        f.write_all(html.as_bytes())
76            .map_err(|e| HtmlError::FileWrite(filename.into(), e))?;
77        Ok(())
78    }
79}
80
81/// Return text of a sequence of contents as a string.
82pub fn as_plain_text(content: &[Content]) -> String {
83    fn as_helper(buf: &mut String, c: &Content) {
84        match c {
85            Content::Text(s) => buf.push_str(s),
86            Content::Html(s) => buf.push_str(s),
87            Content::Elt(e) => {
88                for child in e.children() {
89                    as_helper(buf, child);
90                }
91            }
92        }
93    }
94
95    let mut buf = String::new();
96    for c in content {
97        as_helper(&mut buf, c);
98    }
99    buf
100}
101
102/// An HTML element.
103#[derive(Debug, Clone)]
104pub struct Element {
105    loc: Option<Location>,
106    tag: ElementTag,
107    attrs: Vec<Attribute>,
108    children: Vec<Content>,
109}
110
111impl Element {
112    /// Create a new element.
113    pub fn new(tag: ElementTag) -> Self {
114        Self {
115            loc: None,
116            tag,
117            attrs: vec![],
118            children: vec![],
119        }
120    }
121
122    /// Add location to an element.
123    pub fn with_location(mut self, loc: Location) -> Self {
124        self.loc = Some(loc);
125        self
126    }
127
128    /// Set location.
129    pub fn set_location(&mut self, loc: Location) {
130        self.loc = Some(loc);
131    }
132
133    /// Get location.
134    pub fn location(&self) -> Location {
135        if let Some(loc) = &self.loc {
136            loc.clone()
137        } else {
138            Location::unknown()
139        }
140    }
141
142    /// Set the block attributes for an element.
143    pub fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) {
144        for block_attr in block_attrs {
145            let attr = Attribute::from(block_attr);
146            self.attrs.push(attr);
147        }
148    }
149
150    /// Add a new attribute. If an attribute with the same name
151    /// already exists, append to its value.
152    pub fn push_attribute(&mut self, attr: Attribute) {
153        self.attrs.push(attr);
154    }
155
156    /// Add a new attribute. If an attribute of the same name already
157    /// exists on the element, remove it first.
158    pub fn push_unique_attribute(&mut self, attr: Attribute) {
159        for (i, a) in self.attrs.iter().enumerate() {
160            if a.name == attr.name {
161                self.attrs.remove(i);
162                break;
163            }
164        }
165
166        self.attrs.push(attr);
167    }
168
169    /// Drop all attributes with a given name.
170    pub fn drop_attributes(&mut self, unwanted: &[&str]) {
171        for uw in unwanted {
172            self.attrs.retain(|a| a.name() != *uw);
173        }
174    }
175
176    /// Append a new child to the element.
177    pub fn push_child(&mut self, child: Content) {
178        self.children.push(child);
179    }
180
181    /// Return an element's tag.
182    pub fn tag(&self) -> ElementTag {
183        self.tag
184    }
185
186    /// All attributes.
187    pub fn all_attrs(&self) -> &[Attribute] {
188        &self.attrs
189    }
190
191    /// Return value of a named attribute, if any.
192    pub fn attr(&self, name: &str) -> Option<&Attribute> {
193        self.attrs.iter().find(|a| a.name() == name)
194    }
195
196    /// Has an attribute with a specific value?
197    pub fn has_attr(&self, name: &str, wanted: &str) -> bool {
198        self.attrs
199            .iter()
200            .filter(|a| a.name() == name && a.value() == Some(wanted))
201            .count()
202            > 0
203    }
204
205    /// Return the concatenated text content of direct children,
206    /// ignoring any elements.
207    pub fn content(&self) -> String {
208        let mut buf = String::new();
209        for child in self.children() {
210            buf.push_str(&child.content());
211        }
212        buf
213    }
214
215    /// Return all the children of an element.
216    pub fn children(&self) -> &[Content] {
217        &self.children
218    }
219
220    /// Find first descendant element with that has an attribute with
221    /// a specific value. Not necessarily direct child. Search is
222    /// element first, then recursively the element's children that
223    /// are elements themselves.
224    pub fn find_descendant(&self, name: &str, value: &str) -> Option<&Self> {
225        if self.has_attr(name, value) {
226            return Some(self);
227        }
228
229        for child in self.children() {
230            if let Content::Elt(e) = child {
231                if let Some(it) = e.find_descendant(name, value) {
232                    return Some(it);
233                }
234            }
235        }
236
237        None
238    }
239
240    /// Try to add an alt attribute to an img element.
241    pub fn fix_up_img_alt(&mut self) {
242        if self.tag == ElementTag::Img {
243            if !self.attrs.iter().any(|a| a.name() == "alt") {
244                let alt = as_plain_text(self.children());
245                self.push_attribute(Attribute::new("alt", &alt));
246                self.children.clear();
247            }
248        } else {
249            for child in self.children.iter_mut() {
250                if let Content::Elt(kid) = child {
251                    kid.fix_up_img_alt();
252                }
253            }
254        }
255    }
256
257    /// Serialize an element into HTML text.
258    pub fn serialize(&self) -> Result<String, HtmlError> {
259        let mut buf = String::new();
260        self.serialize_to_buf_without_added_newlines(&mut buf)
261            .map_err(HtmlError::Format)?;
262        Ok(buf)
263    }
264
265    fn serialize_to_buf_without_added_newlines(
266        &self,
267        buf: &mut String,
268    ) -> Result<(), std::fmt::Error> {
269        if self.tag.can_self_close() && self.children.is_empty() {
270            write!(buf, "<{}", self.tag.name())?;
271            self.serialize_attrs_to_buf(buf)?;
272            write!(buf, "/>")?;
273        } else {
274            write!(buf, "<{}", self.tag.name())?;
275            self.serialize_attrs_to_buf(buf)?;
276            write!(buf, ">")?;
277            for c in self.children() {
278                match c {
279                    Content::Text(s) => buf.push_str(&encode_text(s)),
280                    Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?,
281                    Content::Html(s) => buf.push_str(s),
282                }
283            }
284            write!(buf, "</{}>", self.tag.name())?;
285        }
286        Ok(())
287    }
288
289    fn serialize_to_buf_adding_block_newline(
290        &self,
291        buf: &mut String,
292    ) -> Result<(), std::fmt::Error> {
293        if self.tag.is_block() {
294            writeln!(buf)?;
295        }
296        self.serialize_to_buf_without_added_newlines(buf)
297    }
298
299    fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> {
300        let mut attrs = Attributes::default();
301        for attr in self.attrs.iter() {
302            attrs.push(attr);
303        }
304
305        for (name, value) in attrs.iter() {
306            write!(buf, " {}", name)?;
307            if !value.is_empty() {
308                write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?;
309            }
310        }
311        Ok(())
312    }
313}
314
315/// The tag of an HTML element.
316#[derive(Copy, Clone, Debug, Eq, PartialEq)]
317#[allow(missing_docs)]
318pub enum ElementTag {
319    Html,
320    Head,
321    Meta,
322    Body,
323    Div,
324    H1,
325    H2,
326    H3,
327    H4,
328    H5,
329    H6,
330    P,
331    Ol,
332    Ul,
333    Li,
334    Link,
335    Blockquote,
336    Pre,
337    Em,
338    Strong,
339    Del,
340    A,
341    Img,
342    Table,
343    Title,
344    Th,
345    Tr,
346    Td,
347    Br,
348    Hr,
349    Code,
350    Span,
351    Style,
352}
353
354impl ElementTag {
355    /// Name of the tag.
356    pub fn name(&self) -> &str {
357        match self {
358            Self::Html => "html",
359            Self::Head => "head",
360            Self::Meta => "meta",
361            Self::Body => "body",
362            Self::Div => "div",
363            Self::H1 => "h1",
364            Self::H2 => "h2",
365            Self::H3 => "h3",
366            Self::H4 => "h4",
367            Self::H5 => "h5",
368            Self::H6 => "h6",
369            Self::P => "p",
370            Self::Ol => "ol",
371            Self::Ul => "ul",
372            Self::Li => "li",
373            Self::Link => "link",
374            Self::Blockquote => "blockquote",
375            Self::Pre => "pre",
376            Self::Em => "em",
377            Self::Strong => "strong",
378            Self::Del => "del",
379            Self::A => "a",
380            Self::Img => "img",
381            Self::Table => "table",
382            Self::Th => "th",
383            Self::Title => "title",
384            Self::Tr => "tr",
385            Self::Td => "td",
386            Self::Br => "br",
387            Self::Hr => "hr",
388            Self::Code => "code",
389            Self::Span => "span",
390            Self::Style => "style",
391        }
392    }
393
394    fn is_block(&self) -> bool {
395        matches!(
396            self,
397            Self::Html
398                | Self::Head
399                | Self::Meta
400                | Self::Body
401                | Self::Div
402                | Self::H1
403                | Self::H2
404                | Self::H3
405                | Self::H4
406                | Self::H5
407                | Self::H6
408                | Self::P
409                | Self::Ol
410                | Self::Ul
411                | Self::Li
412                | Self::Blockquote
413                | Self::Table
414                | Self::Th
415                | Self::Tr
416                | Self::Br
417                | Self::Hr
418        )
419    }
420
421    fn can_self_close(&self) -> bool {
422        matches!(
423            self,
424            Self::Br | Self::Hr | Self::Img | Self::Link | Self::Meta
425        )
426    }
427}
428
429#[cfg(test)]
430mod test_tag {
431    use super::ElementTag;
432
433    #[test]
434    fn can_self_close() {
435        assert!(ElementTag::Br.can_self_close());
436        assert!(ElementTag::Hr.can_self_close());
437        assert!(ElementTag::Img.can_self_close());
438        assert!(ElementTag::Link.can_self_close());
439        assert!(ElementTag::Meta.can_self_close());
440    }
441
442    #[test]
443    fn cannot_self_close() {
444        assert!(!ElementTag::Html.can_self_close());
445        assert!(!ElementTag::Head.can_self_close());
446        assert!(!ElementTag::Body.can_self_close());
447        assert!(!ElementTag::Div.can_self_close());
448        assert!(!ElementTag::H1.can_self_close());
449        assert!(!ElementTag::H2.can_self_close());
450        assert!(!ElementTag::H3.can_self_close());
451        assert!(!ElementTag::H4.can_self_close());
452        assert!(!ElementTag::H5.can_self_close());
453        assert!(!ElementTag::H6.can_self_close());
454        assert!(!ElementTag::P.can_self_close());
455        assert!(!ElementTag::Ol.can_self_close());
456        assert!(!ElementTag::Ul.can_self_close());
457        assert!(!ElementTag::Li.can_self_close());
458        assert!(!ElementTag::Blockquote.can_self_close());
459        assert!(!ElementTag::Pre.can_self_close());
460        assert!(!ElementTag::Em.can_self_close());
461        assert!(!ElementTag::Strong.can_self_close());
462        assert!(!ElementTag::Del.can_self_close());
463        assert!(!ElementTag::A.can_self_close());
464        assert!(!ElementTag::Table.can_self_close());
465        assert!(!ElementTag::Title.can_self_close());
466        assert!(!ElementTag::Th.can_self_close());
467        assert!(!ElementTag::Tr.can_self_close());
468        assert!(!ElementTag::Td.can_self_close());
469        assert!(!ElementTag::Code.can_self_close());
470        assert!(!ElementTag::Span.can_self_close());
471        assert!(!ElementTag::Style.can_self_close());
472    }
473}
474
475#[derive(Debug, Default, Clone)]
476struct Attributes {
477    attrs: HashMap<String, String>,
478}
479
480impl Attributes {
481    fn push(&mut self, attr: &Attribute) {
482        if let Some(new_value) = attr.value() {
483            if let Some(old_value) = self.attrs.get_mut(attr.name()) {
484                assert!(!old_value.is_empty());
485                old_value.push(' ');
486                old_value.push_str(new_value);
487            } else {
488                self.attrs.insert(attr.name().into(), new_value.into());
489            }
490        } else {
491            assert!(!self.attrs.contains_key(attr.name()));
492            self.attrs.insert(attr.name().into(), "".into());
493        }
494    }
495
496    fn iter(&self) -> impl Iterator<Item = (&String, &String)> {
497        self.attrs.iter()
498    }
499}
500
501/// An attribute of an HTML element.
502#[derive(Clone, Debug)]
503pub struct Attribute {
504    name: String,
505    value: Option<String>,
506}
507
508impl Attribute {
509    /// Create a new element attribute.
510    pub fn new(name: &str, value: &str) -> Self {
511        Self {
512            name: name.into(),
513            value: Some(value.into()),
514        }
515    }
516
517    /// Return the name of the attribute.
518    pub fn name(&self) -> &str {
519        &self.name
520    }
521
522    /// Return the value of the attribute, if any.
523    pub fn value(&self) -> Option<&str> {
524        self.value.as_deref()
525    }
526}
527
528impl From<BlockAttr> for Attribute {
529    fn from(block_attr: BlockAttr) -> Self {
530        match block_attr {
531            BlockAttr::Id(v) => Self::new("id", &v),
532            BlockAttr::Class(v) => Self::new("class", &v),
533            BlockAttr::KeyValue(k, v) => Self::new(&k, &v),
534        }
535    }
536}
537
538/// Content in HTML.
539#[derive(Clone, Debug)]
540pub enum Content {
541    /// Arbitrary text.
542    Text(String),
543
544    /// An HTML element.
545    Elt(Element),
546
547    /// Arbitrary HTML text.
548    Html(String),
549}
550
551impl Content {
552    fn content(&self) -> String {
553        match self {
554            Self::Text(s) => s.clone(),
555            Self::Elt(e) => e.content(),
556            Self::Html(h) => h.clone(),
557        }
558    }
559}
560
561/// Location of element in source file.
562#[derive(Debug, Clone, Eq, Serialize, Deserialize, PartialEq)]
563#[serde(untagged)]
564pub enum Location {
565    /// A known location.
566    Known {
567        /// Name of file.
568        filename: PathBuf,
569        /// Line in file.
570        line: usize,
571        /// Column in line.
572        col: usize,
573    },
574    /// An unknown location.
575    Unknown,
576}
577
578impl Location {
579    /// Create a new location.
580    pub fn new(filename: &Path, line: usize, col: usize) -> Self {
581        Self::Known {
582            filename: filename.into(),
583            line,
584            col,
585        }
586    }
587
588    /// Create an unknown location.
589    pub fn unknown() -> Self {
590        Self::Unknown
591    }
592
593    /// Report name of source file from where this element comes from.
594    pub fn filename(&self) -> &Path {
595        if let Self::Known {
596            filename,
597            line: _,
598            col: _,
599        } = self
600        {
601            filename
602        } else {
603            Path::new("")
604        }
605    }
606
607    /// Report row and column in source where this element comes from.
608    pub fn rowcol(&self) -> (usize, usize) {
609        if let Self::Known {
610            filename: _,
611            line,
612            col,
613        } = self
614        {
615            (*line, *col)
616        } else {
617            (0, 0)
618        }
619    }
620}
621
622impl std::fmt::Display for Location {
623    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
624        if let Self::Known {
625            filename,
626            line,
627            col,
628        } = self
629        {
630            write!(f, "{}:{}:{}", filename.display(), line, col)
631        } else {
632            write!(f, "(unknown location)")
633        }
634    }
635}
636
637/// Errors from the `html` module.
638#[derive(Debug, thiserror::Error)]
639pub enum HtmlError {
640    /// Failed to create a directory.
641    #[error("failed to create directory {0}")]
642    CreateDir(PathBuf, #[source] std::io::Error),
643
644    /// Failed to create a file.
645    #[error("failed to create file {0}")]
646    CreateFile(PathBuf, #[source] std::io::Error),
647
648    /// Failed to write to a file.
649    #[error("failed to write to file {0}")]
650    FileWrite(PathBuf, #[source] std::io::Error),
651
652    /// Input contains an attempt to use a definition list in
653    /// Markdown.
654    #[error("{0}: attempt to use definition lists in Markdown")]
655    DefinitionList(Location),
656
657    /// String formatting error. This is likely a programming error.
658    #[error("string formatting error: {0}")]
659    Format(#[source] std::fmt::Error),
660
661    /// Math is not supported in Subplot.
662    #[error("math markup used in markdown")]
663    Math,
664
665    /// Metadata blocks are not supported in Subplot.
666    #[error("metadata block use in markdown")]
667    Metadata,
668
669    /// Error generating a table of content entry.
670    #[error(transparent)]
671    ToC(#[from] crate::toc::ToCError),
672
673    /// Error parsing fenced code block attributes.
674    #[error("failed to parse fenced code block attributes")]
675    BlockAttr(#[source] BlockAttrError),
676}