1#![deny(missing_docs)]
4
5use html_escape::{encode_double_quoted_attribute, encode_text};
6use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::fmt::Write as _;
9use std::io::Write;
10use std::path::{Path, PathBuf};
11use tracing::{debug, trace};
12
13use crate::blockattr::{BlockAttr, BlockAttrError};
14
15const DOCTYPE: &str = "<!DOCTYPE html>";
16
17#[derive(Debug)]
19pub struct HtmlPage {
20 head: Element,
21 body: Element,
22}
23
24impl Default for HtmlPage {
25 fn default() -> Self {
26 Self {
27 head: Element::new(ElementTag::Head),
28 body: Element::new(ElementTag::Body),
29 }
30 }
31}
32
33impl HtmlPage {
34 pub fn new(head: Element, body: Element) -> Self {
36 Self { head, body }
37 }
38
39 pub fn head(&self) -> &Element {
41 &self.head
42 }
43
44 pub fn body(&self) -> &Element {
46 &self.body
47 }
48
49 pub fn serialize(&self) -> Result<String, HtmlError> {
51 let mut html = Element::new(ElementTag::Html);
52 html.push_child(Content::Elt(self.head.clone()));
53 let mut body = Element::new(ElementTag::Body);
54 body.push_child(Content::Elt(self.body.clone()));
55 html.push_child(Content::Elt(body));
56 let html = html.serialize()?;
57 Ok(format!("{}\n{}", DOCTYPE, html))
58 }
59
60 pub fn write(&self, filename: &Path) -> Result<(), HtmlError> {
62 if let Some(parent) = filename.parent() {
63 trace!("parent: {}", parent.display());
64 if !parent.exists() {
65 debug!("creating directory {}", parent.display());
66 std::fs::create_dir_all(parent)
67 .map_err(|e| HtmlError::CreateDir(parent.into(), e))?;
68 }
69 }
70
71 trace!("writing HTML: {}", filename.display());
72 let mut f = std::fs::File::create(filename)
73 .map_err(|e| HtmlError::CreateFile(filename.into(), e))?;
74 let html = self.serialize()?;
75 f.write_all(html.as_bytes())
76 .map_err(|e| HtmlError::FileWrite(filename.into(), e))?;
77 Ok(())
78 }
79}
80
81pub fn as_plain_text(content: &[Content]) -> String {
83 fn as_helper(buf: &mut String, c: &Content) {
84 match c {
85 Content::Text(s) => buf.push_str(s),
86 Content::Html(s) => buf.push_str(s),
87 Content::Elt(e) => {
88 for child in e.children() {
89 as_helper(buf, child);
90 }
91 }
92 }
93 }
94
95 let mut buf = String::new();
96 for c in content {
97 as_helper(&mut buf, c);
98 }
99 buf
100}
101
102#[derive(Debug, Clone)]
104pub struct Element {
105 loc: Option<Location>,
106 tag: ElementTag,
107 attrs: Vec<Attribute>,
108 children: Vec<Content>,
109}
110
111impl Element {
112 pub fn new(tag: ElementTag) -> Self {
114 Self {
115 loc: None,
116 tag,
117 attrs: vec![],
118 children: vec![],
119 }
120 }
121
122 pub fn with_location(mut self, loc: Location) -> Self {
124 self.loc = Some(loc);
125 self
126 }
127
128 pub fn set_location(&mut self, loc: Location) {
130 self.loc = Some(loc);
131 }
132
133 pub fn location(&self) -> Location {
135 if let Some(loc) = &self.loc {
136 loc.clone()
137 } else {
138 Location::unknown()
139 }
140 }
141
142 pub fn set_block_attributes(&mut self, block_attrs: Vec<BlockAttr>) {
144 for block_attr in block_attrs {
145 let attr = Attribute::from(block_attr);
146 self.attrs.push(attr);
147 }
148 }
149
150 pub fn push_attribute(&mut self, attr: Attribute) {
153 self.attrs.push(attr);
154 }
155
156 pub fn push_unique_attribute(&mut self, attr: Attribute) {
159 for (i, a) in self.attrs.iter().enumerate() {
160 if a.name == attr.name {
161 self.attrs.remove(i);
162 break;
163 }
164 }
165
166 self.attrs.push(attr);
167 }
168
169 pub fn drop_attributes(&mut self, unwanted: &[&str]) {
171 for uw in unwanted {
172 self.attrs.retain(|a| a.name() != *uw);
173 }
174 }
175
176 pub fn push_child(&mut self, child: Content) {
178 self.children.push(child);
179 }
180
181 pub fn tag(&self) -> ElementTag {
183 self.tag
184 }
185
186 pub fn all_attrs(&self) -> &[Attribute] {
188 &self.attrs
189 }
190
191 pub fn attr(&self, name: &str) -> Option<&Attribute> {
193 self.attrs.iter().find(|a| a.name() == name)
194 }
195
196 pub fn has_attr(&self, name: &str, wanted: &str) -> bool {
198 self.attrs
199 .iter()
200 .filter(|a| a.name() == name && a.value() == Some(wanted))
201 .count()
202 > 0
203 }
204
205 pub fn content(&self) -> String {
208 let mut buf = String::new();
209 for child in self.children() {
210 buf.push_str(&child.content());
211 }
212 buf
213 }
214
215 pub fn children(&self) -> &[Content] {
217 &self.children
218 }
219
220 pub fn find_descendant(&self, name: &str, value: &str) -> Option<&Self> {
225 if self.has_attr(name, value) {
226 return Some(self);
227 }
228
229 for child in self.children() {
230 if let Content::Elt(e) = child {
231 if let Some(it) = e.find_descendant(name, value) {
232 return Some(it);
233 }
234 }
235 }
236
237 None
238 }
239
240 pub fn fix_up_img_alt(&mut self) {
242 if self.tag == ElementTag::Img {
243 if !self.attrs.iter().any(|a| a.name() == "alt") {
244 let alt = as_plain_text(self.children());
245 self.push_attribute(Attribute::new("alt", &alt));
246 self.children.clear();
247 }
248 } else {
249 for child in self.children.iter_mut() {
250 if let Content::Elt(kid) = child {
251 kid.fix_up_img_alt();
252 }
253 }
254 }
255 }
256
257 pub fn serialize(&self) -> Result<String, HtmlError> {
259 let mut buf = String::new();
260 self.serialize_to_buf_without_added_newlines(&mut buf)
261 .map_err(HtmlError::Format)?;
262 Ok(buf)
263 }
264
265 fn serialize_to_buf_without_added_newlines(
266 &self,
267 buf: &mut String,
268 ) -> Result<(), std::fmt::Error> {
269 if self.tag.can_self_close() && self.children.is_empty() {
270 write!(buf, "<{}", self.tag.name())?;
271 self.serialize_attrs_to_buf(buf)?;
272 write!(buf, "/>")?;
273 } else {
274 write!(buf, "<{}", self.tag.name())?;
275 self.serialize_attrs_to_buf(buf)?;
276 write!(buf, ">")?;
277 for c in self.children() {
278 match c {
279 Content::Text(s) => buf.push_str(&encode_text(s)),
280 Content::Elt(e) => e.serialize_to_buf_adding_block_newline(buf)?,
281 Content::Html(s) => buf.push_str(s),
282 }
283 }
284 write!(buf, "</{}>", self.tag.name())?;
285 }
286 Ok(())
287 }
288
289 fn serialize_to_buf_adding_block_newline(
290 &self,
291 buf: &mut String,
292 ) -> Result<(), std::fmt::Error> {
293 if self.tag.is_block() {
294 writeln!(buf)?;
295 }
296 self.serialize_to_buf_without_added_newlines(buf)
297 }
298
299 fn serialize_attrs_to_buf(&self, buf: &mut String) -> Result<(), std::fmt::Error> {
300 let mut attrs = Attributes::default();
301 for attr in self.attrs.iter() {
302 attrs.push(attr);
303 }
304
305 for (name, value) in attrs.iter() {
306 write!(buf, " {}", name)?;
307 if !value.is_empty() {
308 write!(buf, "=\"{}\"", encode_double_quoted_attribute(value))?;
309 }
310 }
311 Ok(())
312 }
313}
314
315#[derive(Copy, Clone, Debug, Eq, PartialEq)]
317#[allow(missing_docs)]
318pub enum ElementTag {
319 Html,
320 Head,
321 Meta,
322 Body,
323 Div,
324 H1,
325 H2,
326 H3,
327 H4,
328 H5,
329 H6,
330 P,
331 Ol,
332 Ul,
333 Li,
334 Link,
335 Blockquote,
336 Pre,
337 Em,
338 Strong,
339 Del,
340 A,
341 Img,
342 Table,
343 Title,
344 Th,
345 Tr,
346 Td,
347 Br,
348 Hr,
349 Code,
350 Span,
351 Style,
352}
353
354impl ElementTag {
355 pub fn name(&self) -> &str {
357 match self {
358 Self::Html => "html",
359 Self::Head => "head",
360 Self::Meta => "meta",
361 Self::Body => "body",
362 Self::Div => "div",
363 Self::H1 => "h1",
364 Self::H2 => "h2",
365 Self::H3 => "h3",
366 Self::H4 => "h4",
367 Self::H5 => "h5",
368 Self::H6 => "h6",
369 Self::P => "p",
370 Self::Ol => "ol",
371 Self::Ul => "ul",
372 Self::Li => "li",
373 Self::Link => "link",
374 Self::Blockquote => "blockquote",
375 Self::Pre => "pre",
376 Self::Em => "em",
377 Self::Strong => "strong",
378 Self::Del => "del",
379 Self::A => "a",
380 Self::Img => "img",
381 Self::Table => "table",
382 Self::Th => "th",
383 Self::Title => "title",
384 Self::Tr => "tr",
385 Self::Td => "td",
386 Self::Br => "br",
387 Self::Hr => "hr",
388 Self::Code => "code",
389 Self::Span => "span",
390 Self::Style => "style",
391 }
392 }
393
394 fn is_block(&self) -> bool {
395 matches!(
396 self,
397 Self::Html
398 | Self::Head
399 | Self::Meta
400 | Self::Body
401 | Self::Div
402 | Self::H1
403 | Self::H2
404 | Self::H3
405 | Self::H4
406 | Self::H5
407 | Self::H6
408 | Self::P
409 | Self::Ol
410 | Self::Ul
411 | Self::Li
412 | Self::Blockquote
413 | Self::Table
414 | Self::Th
415 | Self::Tr
416 | Self::Br
417 | Self::Hr
418 )
419 }
420
421 fn can_self_close(&self) -> bool {
422 matches!(
423 self,
424 Self::Br | Self::Hr | Self::Img | Self::Link | Self::Meta
425 )
426 }
427}
428
429#[cfg(test)]
430mod test_tag {
431 use super::ElementTag;
432
433 #[test]
434 fn can_self_close() {
435 assert!(ElementTag::Br.can_self_close());
436 assert!(ElementTag::Hr.can_self_close());
437 assert!(ElementTag::Img.can_self_close());
438 assert!(ElementTag::Link.can_self_close());
439 assert!(ElementTag::Meta.can_self_close());
440 }
441
442 #[test]
443 fn cannot_self_close() {
444 assert!(!ElementTag::Html.can_self_close());
445 assert!(!ElementTag::Head.can_self_close());
446 assert!(!ElementTag::Body.can_self_close());
447 assert!(!ElementTag::Div.can_self_close());
448 assert!(!ElementTag::H1.can_self_close());
449 assert!(!ElementTag::H2.can_self_close());
450 assert!(!ElementTag::H3.can_self_close());
451 assert!(!ElementTag::H4.can_self_close());
452 assert!(!ElementTag::H5.can_self_close());
453 assert!(!ElementTag::H6.can_self_close());
454 assert!(!ElementTag::P.can_self_close());
455 assert!(!ElementTag::Ol.can_self_close());
456 assert!(!ElementTag::Ul.can_self_close());
457 assert!(!ElementTag::Li.can_self_close());
458 assert!(!ElementTag::Blockquote.can_self_close());
459 assert!(!ElementTag::Pre.can_self_close());
460 assert!(!ElementTag::Em.can_self_close());
461 assert!(!ElementTag::Strong.can_self_close());
462 assert!(!ElementTag::Del.can_self_close());
463 assert!(!ElementTag::A.can_self_close());
464 assert!(!ElementTag::Table.can_self_close());
465 assert!(!ElementTag::Title.can_self_close());
466 assert!(!ElementTag::Th.can_self_close());
467 assert!(!ElementTag::Tr.can_self_close());
468 assert!(!ElementTag::Td.can_self_close());
469 assert!(!ElementTag::Code.can_self_close());
470 assert!(!ElementTag::Span.can_self_close());
471 assert!(!ElementTag::Style.can_self_close());
472 }
473}
474
475#[derive(Debug, Default, Clone)]
476struct Attributes {
477 attrs: HashMap<String, String>,
478}
479
480impl Attributes {
481 fn push(&mut self, attr: &Attribute) {
482 if let Some(new_value) = attr.value() {
483 if let Some(old_value) = self.attrs.get_mut(attr.name()) {
484 assert!(!old_value.is_empty());
485 old_value.push(' ');
486 old_value.push_str(new_value);
487 } else {
488 self.attrs.insert(attr.name().into(), new_value.into());
489 }
490 } else {
491 assert!(!self.attrs.contains_key(attr.name()));
492 self.attrs.insert(attr.name().into(), "".into());
493 }
494 }
495
496 fn iter(&self) -> impl Iterator<Item = (&String, &String)> {
497 self.attrs.iter()
498 }
499}
500
501#[derive(Clone, Debug)]
503pub struct Attribute {
504 name: String,
505 value: Option<String>,
506}
507
508impl Attribute {
509 pub fn new(name: &str, value: &str) -> Self {
511 Self {
512 name: name.into(),
513 value: Some(value.into()),
514 }
515 }
516
517 pub fn name(&self) -> &str {
519 &self.name
520 }
521
522 pub fn value(&self) -> Option<&str> {
524 self.value.as_deref()
525 }
526}
527
528impl From<BlockAttr> for Attribute {
529 fn from(block_attr: BlockAttr) -> Self {
530 match block_attr {
531 BlockAttr::Id(v) => Self::new("id", &v),
532 BlockAttr::Class(v) => Self::new("class", &v),
533 BlockAttr::KeyValue(k, v) => Self::new(&k, &v),
534 }
535 }
536}
537
538#[derive(Clone, Debug)]
540pub enum Content {
541 Text(String),
543
544 Elt(Element),
546
547 Html(String),
549}
550
551impl Content {
552 fn content(&self) -> String {
553 match self {
554 Self::Text(s) => s.clone(),
555 Self::Elt(e) => e.content(),
556 Self::Html(h) => h.clone(),
557 }
558 }
559}
560
561#[derive(Debug, Clone, Eq, Serialize, Deserialize, PartialEq)]
563#[serde(untagged)]
564pub enum Location {
565 Known {
567 filename: PathBuf,
569 line: usize,
571 col: usize,
573 },
574 Unknown,
576}
577
578impl Location {
579 pub fn new(filename: &Path, line: usize, col: usize) -> Self {
581 Self::Known {
582 filename: filename.into(),
583 line,
584 col,
585 }
586 }
587
588 pub fn unknown() -> Self {
590 Self::Unknown
591 }
592
593 pub fn filename(&self) -> &Path {
595 if let Self::Known {
596 filename,
597 line: _,
598 col: _,
599 } = self
600 {
601 filename
602 } else {
603 Path::new("")
604 }
605 }
606
607 pub fn rowcol(&self) -> (usize, usize) {
609 if let Self::Known {
610 filename: _,
611 line,
612 col,
613 } = self
614 {
615 (*line, *col)
616 } else {
617 (0, 0)
618 }
619 }
620}
621
622impl std::fmt::Display for Location {
623 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> Result<(), std::fmt::Error> {
624 if let Self::Known {
625 filename,
626 line,
627 col,
628 } = self
629 {
630 write!(f, "{}:{}:{}", filename.display(), line, col)
631 } else {
632 write!(f, "(unknown location)")
633 }
634 }
635}
636
637#[derive(Debug, thiserror::Error)]
639pub enum HtmlError {
640 #[error("failed to create directory {0}")]
642 CreateDir(PathBuf, #[source] std::io::Error),
643
644 #[error("failed to create file {0}")]
646 CreateFile(PathBuf, #[source] std::io::Error),
647
648 #[error("failed to write to file {0}")]
650 FileWrite(PathBuf, #[source] std::io::Error),
651
652 #[error("{0}: attempt to use definition lists in Markdown")]
655 DefinitionList(Location),
656
657 #[error("string formatting error: {0}")]
659 Format(#[source] std::fmt::Error),
660
661 #[error("math markup used in markdown")]
663 Math,
664
665 #[error("metadata block use in markdown")]
667 Metadata,
668
669 #[error(transparent)]
671 ToC(#[from] crate::toc::ToCError),
672
673 #[error("failed to parse fenced code block attributes")]
675 BlockAttr(#[source] BlockAttrError),
676}