html2text/
lib.rs

1//! Convert HTML to text formats.
2//!
3//! This crate renders HTML into a text format, wrapped to a specified width.
4//! This can either be plain text or with extra annotations to (for example)
5//! show in a terminal which supports colours.
6//!
7//! # Examples
8//!
9//! ```rust
10//! # use html2text::from_read;
11//! let html = b"
12//!        <ul>
13//!          <li>Item one</li>
14//!          <li>Item two</li>
15//!          <li>Item three</li>
16//!        </ul>";
17//! assert_eq!(from_read(&html[..], 20).unwrap(),
18//!            "\
19//! * Item one
20//! * Item two
21//! * Item three
22//! ");
23//! ```
24//! A couple of simple demonstration programs are included as examples:
25//!
26//! ### html2text
27//!
28//! The simplest example uses `from_read` to convert HTML on stdin into plain
29//! text:
30//!
31//! ```sh
32//! $ cargo run --example html2text < foo.html
33//! [...]
34//! ```
35//!
36//! ### html2term
37//!
38//! A very simple example of using the rich interface (`from_read_rich`) for a
39//! slightly interactive console HTML viewer is provided as `html2term`.
40//!
41//! ```sh
42//! $ cargo run --example html2term foo.html
43//! [...]
44//! ```
45//!
46//! Note that this example takes the HTML file as a parameter so that it can
47//! read keys from stdin.
48//!
49
50#![deny(missing_docs)]
51
52// Check code in README.md
53#[cfg(doctest)]
54#[doc = include_str!("../README.md")]
55struct ReadMe;
56
57#[macro_use]
58mod macros;
59
60pub mod css;
61pub mod render;
62
63/// Extra methods on chars for dealing with special cases with wrapping and whitespace.
64trait WhitespaceExt {
65    /// Returns whether this character always takes space. This is true for non-whitespace and
66    /// non-breaking spaces.
67    fn always_takes_space(&self) -> bool;
68
69    /// Returns true if a word before this character is allowed. This includes most whitespace
70    /// (but not non-breaking space).
71    fn is_wordbreak_point(&self) -> bool;
72}
73
74impl WhitespaceExt for char {
75    fn always_takes_space(&self) -> bool {
76        match *self {
77            '\u{A0}' => true,
78            c if !c.is_whitespace() => true,
79            _ => false,
80        }
81    }
82
83    fn is_wordbreak_point(&self) -> bool {
84        match *self {
85            '\u{00A0}' => false,
86            '\u{200b}' => true,
87            c if c.is_whitespace() => true,
88            _ => false,
89        }
90    }
91}
92
93/// Extra methods for strings
94trait StrExt {
95    /// Trims leading/trailing whitespace expect for hard spaces.
96    fn trim_collapsible_ws(&self) -> &str;
97}
98
99impl StrExt for str {
100    fn trim_collapsible_ws(&self) -> &str {
101        self.trim_matches(|c: char| !c.always_takes_space())
102    }
103}
104
105#[cfg(feature = "css_ext")]
106/// Text style information.
107#[derive(Clone, Debug)]
108#[non_exhaustive]
109pub struct TextStyle {
110    /// The foreground colour
111    pub fg_colour: Colour,
112    /// The background colour, or None.
113    pub bg_colour: Option<Colour>,
114}
115
116#[cfg(feature = "css_ext")]
117impl TextStyle {
118    /// Create a TextStyle from foreground and background colours.
119    pub fn colours(fg_colour: Colour, bg_colour: Colour) -> Self {
120        TextStyle {
121            fg_colour,
122            bg_colour: Some(bg_colour),
123        }
124    }
125
126    /// Create a TextStyle using only a foreground colour.
127    pub fn foreground(fg_colour: Colour) -> Self {
128        TextStyle {
129            fg_colour,
130            bg_colour: None,
131        }
132    }
133}
134
135#[cfg(feature = "css_ext")]
136/// Syntax highlighter function.
137///
138/// Takes a string corresponding to some text to be highlighted, and returns
139/// spans with sub-strs of that text with associated colours.
140pub type SyntaxHighlighter = Box<dyn for<'a> Fn(&'a str) -> Vec<(TextStyle, &'a str)>>;
141
142use markup5ever_rcdom::Node;
143use render::text_renderer::{
144    RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer,
145};
146use render::{Renderer, TextDecorator, TrivialDecorator};
147
148use html5ever::driver::ParseOpts;
149use html5ever::parse_document;
150use html5ever::tree_builder::TreeBuilderOpts;
151mod markup5ever_rcdom;
152pub use html5ever::{expanded_name, local_name, namespace_url, ns};
153pub use markup5ever_rcdom::{
154    Handle,
155    NodeData::{Comment, Document, Element},
156    RcDom,
157};
158
159use std::cell::{Cell, RefCell};
160use std::cmp::{max, min};
161use std::collections::{BTreeSet, HashMap};
162#[cfg(feature = "css_ext")]
163use std::ops::Range;
164use std::rc::Rc;
165use unicode_width::UnicodeWidthStr;
166
167use std::io;
168use std::io::Write;
169use std::iter::{once, repeat};
170
171#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
172pub(crate) enum WhiteSpace {
173    #[default]
174    Normal,
175    // NoWrap,
176    Pre,
177    #[allow(unused)]
178    PreWrap,
179    // PreLine,
180    // BreakSpaces,
181}
182
183impl WhiteSpace {
184    pub fn preserve_whitespace(&self) -> bool {
185        match self {
186            WhiteSpace::Normal => false,
187            WhiteSpace::Pre | WhiteSpace::PreWrap => true,
188        }
189    }
190    #[allow(unused)]
191    pub fn do_wrap(&self) -> bool {
192        match self {
193            WhiteSpace::Normal | WhiteSpace::PreWrap => true,
194            WhiteSpace::Pre => false,
195        }
196    }
197}
198
199/// An RGB colour value
200#[derive(Copy, Clone, Debug, PartialEq, Eq)]
201pub struct Colour {
202    /// Red value
203    pub r: u8,
204    /// Green value
205    pub g: u8,
206    /// Blue value
207    pub b: u8,
208}
209
210impl std::fmt::Display for Colour {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        write!(f, "#{:02x}{:02x}{:02x}", self.r, self.g, self.b)
213    }
214}
215
216#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, PartialOrd)]
217pub(crate) enum StyleOrigin {
218    #[default]
219    None,
220    Agent,
221    #[allow(unused)]
222    User,
223    #[allow(unused)]
224    Author,
225}
226
227#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
228pub(crate) struct Specificity {
229    inline: bool,
230    id: u16,
231    class: u16,
232    typ: u16,
233}
234
235impl Specificity {
236    #[cfg(feature = "css")]
237    fn inline() -> Self {
238        Specificity {
239            inline: true,
240            id: 0,
241            class: 0,
242            typ: 0,
243        }
244    }
245}
246
247impl std::ops::Add<&Specificity> for &Specificity {
248    type Output = Specificity;
249
250    fn add(self, rhs: &Specificity) -> Self::Output {
251        Specificity {
252            inline: self.inline || rhs.inline,
253            id: self.id + rhs.id,
254            class: self.class + rhs.class,
255            typ: self.typ + rhs.typ,
256        }
257    }
258}
259
260impl std::ops::AddAssign<&Specificity> for Specificity {
261    fn add_assign(&mut self, rhs: &Specificity) {
262        self.inline = self.inline || rhs.inline;
263        self.id += rhs.id;
264        self.class += rhs.class;
265        self.typ += rhs.typ;
266    }
267}
268
269impl PartialOrd for Specificity {
270    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
271        match self.inline.partial_cmp(&other.inline) {
272            Some(core::cmp::Ordering::Equal) => {}
273            ord => return ord,
274        }
275        match self.id.partial_cmp(&other.id) {
276            Some(core::cmp::Ordering::Equal) => {}
277            ord => return ord,
278        }
279        match self.class.partial_cmp(&other.class) {
280            Some(core::cmp::Ordering::Equal) => {}
281            ord => return ord,
282        }
283        self.typ.partial_cmp(&other.typ)
284    }
285}
286
287#[derive(Clone, Copy, Debug)]
288pub(crate) struct WithSpec<T> {
289    val: Option<T>,
290    origin: StyleOrigin,
291    specificity: Specificity,
292    important: bool,
293}
294impl<T: Clone> WithSpec<T> {
295    pub(crate) fn maybe_update(
296        &mut self,
297        important: bool,
298        origin: StyleOrigin,
299        specificity: Specificity,
300        val: T,
301    ) {
302        if self.val.is_some() {
303            // We already have a value, so need to check.
304            if self.important && !important {
305                // important takes priority over not important.
306                return;
307            }
308            // importance is the same.  Next is checking the origin.
309            {
310                use StyleOrigin::*;
311                match (self.origin, origin) {
312                    (Agent, Agent) | (User, User) | (Author, Author) => {
313                        // They're the same so continue the comparison
314                    }
315                    (mine, theirs) => {
316                        if (important && theirs > mine) || (!important && mine > theirs) {
317                            return;
318                        }
319                    }
320                }
321            }
322            // We're now from the same origin an importance
323            if specificity < self.specificity {
324                return;
325            }
326        }
327        self.val = Some(val);
328        self.origin = origin;
329        self.specificity = specificity;
330        self.important = important;
331    }
332
333    pub fn val(&self) -> Option<&T> {
334        self.val.as_ref()
335    }
336}
337
338impl<T> Default for WithSpec<T> {
339    fn default() -> Self {
340        WithSpec {
341            val: None,
342            origin: StyleOrigin::None,
343            specificity: Default::default(),
344            important: false,
345        }
346    }
347}
348
349#[derive(Debug, Clone, Default)]
350pub(crate) struct ComputedStyle {
351    #[cfg(feature = "css")]
352    /// The computed foreground colour, if any
353    pub(crate) colour: WithSpec<Colour>,
354    #[cfg(feature = "css")]
355    /// The computed background colour, if any
356    pub(crate) bg_colour: WithSpec<Colour>,
357    #[cfg(feature = "css")]
358    /// If set, indicates whether `display: none` or something equivalent applies
359    pub(crate) display: WithSpec<css::Display>,
360    /// The CSS white-space property
361    pub(crate) white_space: WithSpec<WhiteSpace>,
362    /// The CSS content property
363    pub(crate) content: WithSpec<css::PseudoContent>,
364    #[cfg(feature = "css_ext")]
365    pub(crate) syntax: WithSpec<css::SyntaxInfo>,
366
367    /// The CSS content property for ::before
368    pub(crate) content_before: Option<Box<ComputedStyle>>,
369    /// The CSS content property for ::after
370    pub(crate) content_after: Option<Box<ComputedStyle>>,
371
372    /// A non-CSS flag indicating we're inside a <pre>.
373    pub(crate) internal_pre: bool,
374}
375
376impl ComputedStyle {
377    /// Return the style data inherited by children.
378    pub(crate) fn inherit(&self) -> Self {
379        // TODO: clear fields that shouldn't be inherited
380        self.clone()
381    }
382}
383
384/// Errors from reading or rendering HTML
385#[derive(thiserror::Error, Debug)]
386#[non_exhaustive]
387pub enum Error {
388    /// The output width was too narrow to render to.
389    #[error("Output width not wide enough.")]
390    TooNarrow,
391    /// CSS parse error
392    #[error("Invalid CSS")]
393    CssParseError,
394    /// An general error was encountered.
395    #[error("Unknown failure")]
396    Fail,
397    /// An I/O error
398    #[error("I/O error")]
399    IoError(#[from] io::Error),
400}
401
402impl PartialEq for Error {
403    fn eq(&self, other: &Error) -> bool {
404        use Error::*;
405        match (self, other) {
406            (TooNarrow, TooNarrow) => true,
407            #[cfg(feature = "css")]
408            (CssParseError, CssParseError) => true,
409            (Fail, Fail) => true,
410            _ => false,
411        }
412    }
413}
414
415impl Eq for Error {}
416
417type Result<T> = std::result::Result<T, Error>;
418
419const MIN_WIDTH: usize = 3;
420
421/// Size information/estimate
422#[derive(Debug, Copy, Clone, Default)]
423struct SizeEstimate {
424    size: usize,      // Rough overall size
425    min_width: usize, // The narrowest possible
426
427    // The use is specific to the node type.
428    prefix_size: usize,
429}
430
431impl SizeEstimate {
432    /// Combine two estimates into one (add size and take the largest
433    /// min width)
434    fn add(self, other: SizeEstimate) -> SizeEstimate {
435        let min_width = max(self.min_width, other.min_width);
436        SizeEstimate {
437            size: self.size + other.size,
438            min_width,
439            prefix_size: 0,
440        }
441    }
442    /// Combine two estimates into one which need to be side by side.
443    /// The min widths are added.
444    fn add_hor(self, other: SizeEstimate) -> SizeEstimate {
445        SizeEstimate {
446            size: self.size + other.size,
447            min_width: self.min_width + other.min_width,
448            prefix_size: 0,
449        }
450    }
451
452    /// Combine two estimates into one (take max of each)
453    fn max(self, other: SizeEstimate) -> SizeEstimate {
454        SizeEstimate {
455            size: max(self.size, other.size),
456            min_width: max(self.min_width, other.min_width),
457            prefix_size: 0,
458        }
459    }
460}
461
462#[derive(Clone, Debug)]
463/// Render tree table cell
464struct RenderTableCell {
465    colspan: usize,
466    rowspan: usize,
467    content: Vec<RenderNode>,
468    size_estimate: Cell<Option<SizeEstimate>>,
469    col_width: Option<usize>, // Actual width to use
470    x_pos: Option<usize>,     // X location
471    style: ComputedStyle,
472    is_dummy: bool,
473}
474
475impl RenderTableCell {
476    /// Calculate or return the estimate size of the cell
477    fn get_size_estimate(&self) -> SizeEstimate {
478        let Some(size) = self.size_estimate.get() else {
479            let size = self
480                .content
481                .iter()
482                .map(|node| node.get_size_estimate())
483                .fold(Default::default(), SizeEstimate::add);
484            self.size_estimate.set(Some(size));
485            return size;
486        };
487        size
488    }
489
490    /// Make a placeholder cell to cover for a cell above with
491    /// larger rowspan.
492    pub fn dummy(colspan: usize) -> Self {
493        RenderTableCell {
494            colspan,
495            rowspan: 1,
496            content: Default::default(),
497            size_estimate: Cell::new(Some(SizeEstimate::default())),
498            col_width: None,
499            x_pos: None,
500            style: Default::default(),
501            is_dummy: true,
502        }
503    }
504}
505
506#[derive(Clone, Debug)]
507/// Render tree table row
508struct RenderTableRow {
509    cells: Vec<RenderTableCell>,
510    col_sizes: Option<Vec<usize>>,
511    style: ComputedStyle,
512}
513
514impl RenderTableRow {
515    /// Return a mutable iterator over the cells.
516    fn cells(&self) -> std::slice::Iter<'_, RenderTableCell> {
517        self.cells.iter()
518    }
519    /// Return a mutable iterator over the cells.
520    fn cells_mut(&mut self) -> std::slice::IterMut<'_, RenderTableCell> {
521        self.cells.iter_mut()
522    }
523    /// Return an iterator which returns cells by values (removing
524    /// them from the row).
525    fn cells_drain(&mut self) -> impl Iterator<Item = RenderTableCell> {
526        std::mem::take(&mut self.cells).into_iter()
527    }
528    /// Count the number of cells in the row.
529    /// Takes into account colspan.
530    fn num_cells(&self) -> usize {
531        self.cells.iter().map(|cell| cell.colspan.max(1)).sum()
532    }
533
534    /// Return the contained cells as RenderNodes, annotated with their
535    /// widths if available.  Skips cells with no width allocated.
536    fn into_cells(self, vertical: bool) -> Vec<RenderNode> {
537        let mut result = Vec::new();
538        let mut colno = 0;
539        let col_sizes = self.col_sizes.unwrap();
540        let mut x_pos = 0;
541        for mut cell in self.cells {
542            let colspan = cell.colspan;
543            let col_width = if vertical {
544                col_sizes[colno]
545            } else {
546                col_sizes[colno..colno + cell.colspan].iter().sum::<usize>()
547            };
548            // Skip any zero-width columns
549            if col_width > 0 {
550                let this_col_width = col_width + cell.colspan - 1;
551                cell.col_width = Some(this_col_width);
552                cell.x_pos = Some(x_pos);
553                x_pos += this_col_width + 1;
554                let style = cell.style.clone();
555                result.push(RenderNode::new_styled(
556                    RenderNodeInfo::TableCell(cell),
557                    style,
558                ));
559            }
560            colno += colspan;
561        }
562        result
563    }
564}
565
566#[derive(Clone, Debug)]
567/// A representation of a table render tree with metadata.
568struct RenderTable {
569    rows: Vec<RenderTableRow>,
570    num_columns: usize,
571    size_estimate: Cell<Option<SizeEstimate>>,
572}
573
574impl RenderTable {
575    /// Create a new RenderTable with the given rows
576    fn new(mut rows: Vec<RenderTableRow>) -> RenderTable {
577        // We later on want to allocate a vector sized by the column count,
578        // but occasionally we see something like colspan="1000000000".  We
579        // handle this by remapping the column ids to the smallest values
580        // possible.
581        //
582        // Tables with no explicit colspan will be unchanged, but if there
583        // are multiple columns each covered by a single <td> on every row,
584        // they will be collapsed into a single column.  For example:
585        //
586        //    <td><td colspan=1000><td>
587        //    <td colspan=1000><td><td>
588        //
589        //  becomes the equivalent:
590        //    <td><td colspan=2><td>
591        //    <td colspan=2><td><td>
592
593        // This will include 0 and the index after the last colspan.
594        let mut col_positions = BTreeSet::new();
595        // Cells which have a rowspan > 1 from previous rows.
596        // Each element is (rows_left, colpos, colspan)
597        // Before each row, the overhangs are in reverse order so that
598        // they can be popped off.
599        let mut overhang_cells: Vec<(usize, usize, usize)> = Vec::new();
600        let mut next_overhang_cells = Vec::new();
601        col_positions.insert(0);
602        for row in &mut rows {
603            let mut col = 0;
604            let mut new_cells = Vec::new();
605
606            for cell in row.cells_drain() {
607                while let Some(hanging) = overhang_cells.last() {
608                    if hanging.1 <= col {
609                        new_cells.push(RenderTableCell::dummy(hanging.2));
610                        col += hanging.2;
611                        col_positions.insert(col);
612                        let mut used = overhang_cells.pop().unwrap();
613                        if used.0 > 1 {
614                            used.0 -= 1;
615                            next_overhang_cells.push(used);
616                        }
617                    } else {
618                        break;
619                    }
620                }
621                if cell.rowspan > 1 {
622                    next_overhang_cells.push((cell.rowspan - 1, col, cell.colspan));
623                }
624                col += cell.colspan;
625                col_positions.insert(col);
626                new_cells.push(cell);
627            }
628            // Handle remaining overhanging cells
629            while let Some(mut hanging) = overhang_cells.pop() {
630                new_cells.push(RenderTableCell::dummy(hanging.2));
631                col += hanging.2;
632                col_positions.insert(col);
633                if hanging.0 > 1 {
634                    hanging.0 -= 1;
635                    next_overhang_cells.push(hanging);
636                }
637            }
638
639            row.cells = new_cells;
640            overhang_cells = std::mem::take(&mut next_overhang_cells);
641            overhang_cells.reverse();
642        }
643
644        let colmap: HashMap<_, _> = col_positions
645            .into_iter()
646            .enumerate()
647            .map(|(i, pos)| (pos, i))
648            .collect();
649
650        for row in &mut rows {
651            let mut pos = 0;
652            let mut mapped_pos = 0;
653            for cell in row.cells_mut() {
654                let nextpos = pos + cell.colspan.max(1);
655                let next_mapped_pos = *colmap.get(&nextpos).unwrap();
656                cell.colspan = next_mapped_pos - mapped_pos;
657                pos = nextpos;
658                mapped_pos = next_mapped_pos;
659            }
660        }
661
662        let num_columns = rows.iter().map(|r| r.num_cells()).max().unwrap_or(0);
663        RenderTable {
664            rows,
665            num_columns,
666            size_estimate: Cell::new(None),
667        }
668    }
669
670    /// Return an iterator over the rows.
671    fn rows(&self) -> std::slice::Iter<'_, RenderTableRow> {
672        self.rows.iter()
673    }
674
675    /// Consume this and return a `Vec<RenderNode>` containing the children;
676    /// the children know the column sizes required.
677    fn into_rows(self, col_sizes: Vec<usize>, vert: bool) -> Vec<RenderNode> {
678        self.rows
679            .into_iter()
680            .map(|mut tr| {
681                tr.col_sizes = Some(col_sizes.clone());
682                let style = tr.style.clone();
683                RenderNode::new_styled(RenderNodeInfo::TableRow(tr, vert), style)
684            })
685            .collect()
686    }
687
688    fn calc_size_estimate(&self, _context: &HtmlContext) -> SizeEstimate {
689        if self.num_columns == 0 {
690            let result = SizeEstimate {
691                size: 0,
692                min_width: 0,
693                prefix_size: 0,
694            };
695            self.size_estimate.set(Some(result));
696            return result;
697        }
698        let mut sizes: Vec<SizeEstimate> = vec![Default::default(); self.num_columns];
699
700        // For now, a simple estimate based on adding up sub-parts.
701        for row in self.rows() {
702            let mut colno = 0usize;
703            for cell in row.cells() {
704                let cellsize = cell.get_size_estimate();
705                for colnum in 0..cell.colspan {
706                    sizes[colno + colnum].size += cellsize.size / cell.colspan;
707                    sizes[colno + colnum].min_width = max(
708                        sizes[colno + colnum].min_width,
709                        cellsize.min_width / cell.colspan,
710                    );
711                }
712                colno += cell.colspan;
713            }
714        }
715        let size = sizes.iter().map(|s| s.size).sum::<usize>() + self.num_columns.saturating_sub(1);
716        let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
717        let result = SizeEstimate {
718            size,
719            min_width,
720            prefix_size: 0,
721        };
722        self.size_estimate.set(Some(result));
723        result
724    }
725}
726
727/// The node-specific information distilled from the DOM.
728#[derive(Clone, Debug)]
729#[non_exhaustive]
730enum RenderNodeInfo {
731    /// Some text.
732    Text(String),
733    /// A group of nodes collected together.
734    Container(Vec<RenderNode>),
735    /// A link with contained nodes
736    Link(String, Vec<RenderNode>),
737    /// An emphasised region
738    Em(Vec<RenderNode>),
739    /// A strong region
740    Strong(Vec<RenderNode>),
741    /// A struck out region
742    Strikeout(Vec<RenderNode>),
743    /// A code region
744    Code(Vec<RenderNode>),
745    /// An image (src, title)
746    Img(String, String),
747    /// An inline SVG (title)
748    Svg(String),
749    /// A block element with children
750    Block(Vec<RenderNode>),
751    /// A header (h1, h2, ...) with children
752    Header(usize, Vec<RenderNode>),
753    /// A Div element with children
754    Div(Vec<RenderNode>),
755    /// A blockquote
756    BlockQuote(Vec<RenderNode>),
757    /// An unordered list
758    Ul(Vec<RenderNode>),
759    /// An ordered list
760    Ol(i64, Vec<RenderNode>),
761    /// A description list (containing Dt or Dd)
762    Dl(Vec<RenderNode>),
763    /// A term (from a `<dt>`)
764    Dt(Vec<RenderNode>),
765    /// A definition (from a `<dl>`)
766    Dd(Vec<RenderNode>),
767    /// A line break
768    Break,
769    /// A table
770    Table(RenderTable),
771    /// A set of table rows (from either `<thead>` or `<tbody>`
772    TableBody(Vec<RenderTableRow>),
773    /// Table row (must only appear within a table body)
774    /// If the boolean is true, then the cells are drawn vertically
775    /// instead of horizontally (because of space).
776    TableRow(RenderTableRow, bool),
777    /// Table cell (must only appear within a table row)
778    TableCell(RenderTableCell),
779    /// Start of a named HTML fragment
780    FragStart(String),
781    /// A list item
782    ListItem(Vec<RenderNode>),
783    /// Superscript text
784    Sup(Vec<RenderNode>),
785}
786
787/// Common fields from a node.
788#[derive(Clone, Debug)]
789struct RenderNode {
790    size_estimate: Cell<Option<SizeEstimate>>,
791    info: RenderNodeInfo,
792    style: ComputedStyle,
793}
794
795impl RenderNode {
796    /// Create a node from the RenderNodeInfo.
797    fn new(info: RenderNodeInfo) -> RenderNode {
798        RenderNode {
799            size_estimate: Cell::new(None),
800            info,
801            style: Default::default(),
802        }
803    }
804
805    /// Create a node from the RenderNodeInfo.
806    fn new_styled(info: RenderNodeInfo, style: ComputedStyle) -> RenderNode {
807        RenderNode {
808            size_estimate: Cell::new(None),
809            info,
810            style,
811        }
812    }
813
814    /// Get a size estimate
815    fn get_size_estimate(&self) -> SizeEstimate {
816        self.size_estimate.get().unwrap()
817    }
818
819    /// Calculate the size of this node.
820    fn calc_size_estimate<D: TextDecorator>(
821        &self,
822        context: &HtmlContext,
823        decorator: &D,
824    ) -> SizeEstimate {
825        // If it's already calculated, then just return the answer.
826        if let Some(s) = self.size_estimate.get() {
827            return s;
828        };
829
830        use RenderNodeInfo::*;
831
832        let recurse = |node: &RenderNode| node.calc_size_estimate(context, decorator);
833
834        // Otherwise, make an estimate.
835        let estimate = match self.info {
836            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
837                use unicode_width::UnicodeWidthChar;
838                let mut len = 0;
839                let mut in_whitespace = false;
840                for c in t.trim_collapsible_ws().chars() {
841                    let is_collapsible_ws = !c.always_takes_space();
842                    if !is_collapsible_ws {
843                        len += UnicodeWidthChar::width(c).unwrap_or(0);
844                        // Count the preceding whitespace as one.
845                        if in_whitespace {
846                            len += 1;
847                        }
848                    }
849                    in_whitespace = is_collapsible_ws;
850                }
851                // Add one for preceding whitespace, unless the node is otherwise empty.
852                if let Some(true) = t.chars().next().map(|c| !c.always_takes_space()) {
853                    if len > 0 {
854                        len += 1;
855                    }
856                }
857                if let Img(_, _) = self.info {
858                    len += 2;
859                }
860                SizeEstimate {
861                    size: len,
862                    min_width: len.min(context.min_wrap_width),
863                    prefix_size: 0,
864                }
865            }
866
867            Container(ref v) | Em(ref v) | Strong(ref v) | Strikeout(ref v) | Code(ref v)
868            | Block(ref v) | Div(ref v) | Dl(ref v) | Dt(ref v) | ListItem(ref v) | Sup(ref v) => v
869                .iter()
870                .map(recurse)
871                .fold(Default::default(), SizeEstimate::add),
872            Link(ref _target, ref v) => v
873                .iter()
874                .map(recurse)
875                .fold(Default::default(), SizeEstimate::add)
876                .add(SizeEstimate {
877                    size: 5,
878                    min_width: 5,
879                    prefix_size: 0,
880                }),
881            Dd(ref v) | BlockQuote(ref v) | Ul(ref v) => {
882                let prefix = match self.info {
883                    Dd(_) => "  ".into(),
884                    BlockQuote(_) => decorator.quote_prefix(),
885                    Ul(_) => decorator.unordered_item_prefix(),
886                    _ => unreachable!(),
887                };
888                let prefix_width = UnicodeWidthStr::width(prefix.as_str());
889                let mut size = v
890                    .iter()
891                    .map(recurse)
892                    .fold(Default::default(), SizeEstimate::add)
893                    .add_hor(SizeEstimate {
894                        size: prefix_width,
895                        min_width: prefix_width,
896                        prefix_size: 0,
897                    });
898                size.prefix_size = prefix_width;
899                size
900            }
901            Ol(i, ref v) => {
902                let prefix_size = calc_ol_prefix_size(i, v.len(), decorator);
903                let mut result = v
904                    .iter()
905                    .map(recurse)
906                    .fold(Default::default(), SizeEstimate::add)
907                    .add_hor(SizeEstimate {
908                        size: prefix_size,
909                        min_width: prefix_size,
910                        prefix_size: 0,
911                    });
912                result.prefix_size = prefix_size;
913                result
914            }
915            Header(level, ref v) => {
916                let prefix_size = decorator.header_prefix(level).len();
917                let mut size = v
918                    .iter()
919                    .map(recurse)
920                    .fold(Default::default(), SizeEstimate::add)
921                    .add_hor(SizeEstimate {
922                        size: prefix_size,
923                        min_width: prefix_size,
924                        prefix_size: 0,
925                    });
926                size.prefix_size = prefix_size;
927                size
928            }
929            Break => SizeEstimate {
930                size: 1,
931                min_width: 1,
932                prefix_size: 0,
933            },
934            Table(ref t) => t.calc_size_estimate(context),
935            TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
936            FragStart(_) => Default::default(),
937        };
938        self.size_estimate.set(Some(estimate));
939        estimate
940    }
941
942    /// Return true if this node is definitely empty.  This is used to quickly
943    /// remove e.g. links with no anchor text in most cases, but can't recurse
944    /// and look more deeply.
945    fn is_shallow_empty(&self) -> bool {
946        use RenderNodeInfo::*;
947
948        // Otherwise, make an estimate.
949        match self.info {
950            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
951                let len = t.trim().len();
952                len == 0
953            }
954
955            Container(ref v)
956            | Link(_, ref v)
957            | Em(ref v)
958            | Strong(ref v)
959            | Strikeout(ref v)
960            | Code(ref v)
961            | Block(ref v)
962            | ListItem(ref v)
963            | Div(ref v)
964            | BlockQuote(ref v)
965            | Dl(ref v)
966            | Dt(ref v)
967            | Dd(ref v)
968            | Ul(ref v)
969            | Ol(_, ref v)
970            | Sup(ref v) => v.is_empty(),
971            Header(_level, ref v) => v.is_empty(),
972            Break => true,
973            Table(ref _t) => false,
974            TableRow(..) | TableBody(_) | TableCell(_) => false,
975            FragStart(_) => true,
976        }
977    }
978
979    fn write_container(
980        &self,
981        name: &str,
982        items: &[RenderNode],
983        f: &mut std::fmt::Formatter,
984        indent: usize,
985    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
986        writeln!(f, "{:indent$}{name}:", "")?;
987        for item in items {
988            item.write_self(f, indent + 1)?;
989        }
990        Ok(())
991    }
992    fn write_style(
993        f: &mut std::fmt::Formatter,
994        indent: usize,
995        style: &ComputedStyle,
996    ) -> std::result::Result<(), std::fmt::Error> {
997        use std::fmt::Write;
998        let mut stylestr = String::new();
999
1000        #[cfg(feature = "css")]
1001        {
1002            if let Some(col) = style.colour.val() {
1003                write!(&mut stylestr, " colour={:?}", col)?;
1004            }
1005            if let Some(col) = style.bg_colour.val() {
1006                write!(&mut stylestr, " bg_colour={:?}", col)?;
1007            }
1008            if let Some(val) = style.display.val() {
1009                write!(&mut stylestr, " disp={:?}", val)?;
1010            }
1011        }
1012        if let Some(ws) = style.white_space.val() {
1013            write!(&mut stylestr, " white_space={:?}", ws)?;
1014        }
1015        if style.internal_pre {
1016            write!(&mut stylestr, " internal_pre")?;
1017        }
1018        if !stylestr.is_empty() {
1019            writeln!(f, "{:indent$}[Style:{stylestr}", "")?;
1020        }
1021        Ok(())
1022    }
1023    fn write_self(
1024        &self,
1025        f: &mut std::fmt::Formatter,
1026        indent: usize,
1027    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
1028        Self::write_style(f, indent, &self.style)?;
1029
1030        match &self.info {
1031            RenderNodeInfo::Text(s) => writeln!(f, "{:indent$}{s:?}", "")?,
1032            RenderNodeInfo::Container(v) => {
1033                self.write_container("Container", v, f, indent)?;
1034            }
1035            RenderNodeInfo::Link(targ, v) => {
1036                self.write_container(&format!("Link({})", targ), v, f, indent)?;
1037            }
1038            RenderNodeInfo::Em(v) => {
1039                self.write_container("Em", v, f, indent)?;
1040            }
1041            RenderNodeInfo::Strong(v) => {
1042                self.write_container("Strong", v, f, indent)?;
1043            }
1044            RenderNodeInfo::Strikeout(v) => {
1045                self.write_container("Strikeout", v, f, indent)?;
1046            }
1047            RenderNodeInfo::Code(v) => {
1048                self.write_container("Code", v, f, indent)?;
1049            }
1050            RenderNodeInfo::Img(src, title) => {
1051                writeln!(f, "{:indent$}Img src={:?} title={:?}:", "", src, title)?;
1052            }
1053            RenderNodeInfo::Svg(title) => {
1054                writeln!(f, "{:indent$}Svg title={:?}:", "", title)?;
1055            }
1056            RenderNodeInfo::Block(v) => {
1057                self.write_container("Block", v, f, indent)?;
1058            }
1059            RenderNodeInfo::Header(depth, v) => {
1060                self.write_container(&format!("Header({})", depth), v, f, indent)?;
1061            }
1062            RenderNodeInfo::Div(v) => {
1063                self.write_container("Div", v, f, indent)?;
1064            }
1065            RenderNodeInfo::BlockQuote(v) => {
1066                self.write_container("BlockQuote", v, f, indent)?;
1067            }
1068            RenderNodeInfo::Ul(v) => {
1069                self.write_container("Ul", v, f, indent)?;
1070            }
1071            RenderNodeInfo::Ol(start, v) => {
1072                self.write_container(&format!("Ol({})", start), v, f, indent)?;
1073            }
1074            RenderNodeInfo::Dl(v) => {
1075                self.write_container("Dl", v, f, indent)?;
1076            }
1077            RenderNodeInfo::Dt(v) => {
1078                self.write_container("Dt", v, f, indent)?;
1079            }
1080            RenderNodeInfo::Dd(v) => {
1081                self.write_container("Dd", v, f, indent)?;
1082            }
1083            RenderNodeInfo::Break => {
1084                writeln!(f, "{:indent$}Break", "", indent = indent)?;
1085            }
1086            RenderNodeInfo::Table(rows) => {
1087                writeln!(f, "{:indent$}Table ({} cols):", "", rows.num_columns)?;
1088                for rtr in &rows.rows {
1089                    Self::write_style(f, indent + 1, &rtr.style)?;
1090                    writeln!(
1091                        f,
1092                        "{:width$}Row ({} cells):",
1093                        "",
1094                        rtr.cells.len(),
1095                        width = indent + 1
1096                    )?;
1097                    for cell in &rtr.cells {
1098                        Self::write_style(f, indent + 2, &cell.style)?;
1099                        writeln!(
1100                            f,
1101                            "{:width$}Cell colspan={} width={:?}:",
1102                            "",
1103                            cell.colspan,
1104                            cell.col_width,
1105                            width = indent + 2
1106                        )?;
1107                        for node in &cell.content {
1108                            node.write_self(f, indent + 3)?;
1109                        }
1110                    }
1111                }
1112            }
1113            RenderNodeInfo::TableBody(_) => todo!(),
1114            RenderNodeInfo::TableRow(_, _) => todo!(),
1115            RenderNodeInfo::TableCell(_) => todo!(),
1116            RenderNodeInfo::FragStart(frag) => {
1117                writeln!(f, "{:indent$}FragStart({}):", "", frag)?;
1118            }
1119            RenderNodeInfo::ListItem(v) => {
1120                self.write_container("ListItem", v, f, indent)?;
1121            }
1122            RenderNodeInfo::Sup(v) => {
1123                self.write_container("Sup", v, f, indent)?;
1124            }
1125        }
1126        Ok(())
1127    }
1128}
1129
1130fn precalc_size_estimate<'a, D: TextDecorator>(
1131    node: &'a RenderNode,
1132    context: &mut HtmlContext,
1133    decorator: &'a D,
1134) -> TreeMapResult<'a, HtmlContext, &'a RenderNode, ()> {
1135    use RenderNodeInfo::*;
1136    if node.size_estimate.get().is_some() {
1137        return TreeMapResult::Nothing;
1138    }
1139    match node.info {
1140        Text(_) | Img(_, _) | Svg(_) | Break | FragStart(_) => {
1141            let _ = node.calc_size_estimate(context, decorator);
1142            TreeMapResult::Nothing
1143        }
1144
1145        Container(ref v)
1146        | Link(_, ref v)
1147        | Em(ref v)
1148        | Strong(ref v)
1149        | Strikeout(ref v)
1150        | Code(ref v)
1151        | Block(ref v)
1152        | ListItem(ref v)
1153        | Div(ref v)
1154        | BlockQuote(ref v)
1155        | Ul(ref v)
1156        | Ol(_, ref v)
1157        | Dl(ref v)
1158        | Dt(ref v)
1159        | Dd(ref v)
1160        | Sup(ref v)
1161        | Header(_, ref v) => TreeMapResult::PendingChildren {
1162            children: v.iter().collect(),
1163            cons: Box::new(move |context, _cs| {
1164                node.calc_size_estimate(context, decorator);
1165                Ok(None)
1166            }),
1167            prefn: None,
1168            postfn: None,
1169        },
1170        Table(ref t) => {
1171            /* Return all the indirect children which are RenderNodes. */
1172            let mut children = Vec::new();
1173            for row in &t.rows {
1174                for cell in &row.cells {
1175                    children.extend(cell.content.iter());
1176                }
1177            }
1178            TreeMapResult::PendingChildren {
1179                children,
1180                cons: Box::new(move |context, _cs| {
1181                    node.calc_size_estimate(context, decorator);
1182                    Ok(None)
1183                }),
1184                prefn: None,
1185                postfn: None,
1186            }
1187        }
1188        TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
1189    }
1190}
1191
1192/// Convert a table into a RenderNode
1193fn table_to_render_tree<'a, T: Write>(
1194    input: RenderInput,
1195    computed: ComputedStyle,
1196    _err_out: &mut T,
1197) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1198    pending(input, move |_, rowset| {
1199        let mut rows = vec![];
1200        for bodynode in rowset {
1201            if let RenderNodeInfo::TableBody(body) = bodynode.info {
1202                rows.extend(body);
1203            } else {
1204                html_trace!("Found in table: {:?}", bodynode.info);
1205            }
1206        }
1207        if rows.is_empty() {
1208            None
1209        } else {
1210            Some(RenderNode::new_styled(
1211                RenderNodeInfo::Table(RenderTable::new(rows)),
1212                computed,
1213            ))
1214        }
1215    })
1216}
1217
1218/// Add rows from a thead or tbody.
1219fn tbody_to_render_tree<'a, T: Write>(
1220    input: RenderInput,
1221    computed: ComputedStyle,
1222    _err_out: &mut T,
1223) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1224    pending_noempty(input, move |_, rowchildren| {
1225        let mut rows = rowchildren
1226            .into_iter()
1227            .flat_map(|rownode| {
1228                if let RenderNodeInfo::TableRow(row, _) = rownode.info {
1229                    Some(row)
1230                } else {
1231                    html_trace!("  [[tbody child: {:?}]]", rownode);
1232                    None
1233                }
1234            })
1235            .collect::<Vec<_>>();
1236
1237        // Handle colspan=0 by replacing it.
1238        // Get a list of (has_zero_colspan, sum_colspan)
1239        let num_columns = rows
1240            .iter()
1241            .map(|row| {
1242                row.cells()
1243                    // Treat the column as having colspan 1 for initial counting.
1244                    .map(|cell| (cell.colspan == 0, cell.colspan.max(1)))
1245                    .fold((false, 0), |a, b| (a.0 || b.0, a.1 + b.1))
1246            })
1247            .collect::<Vec<_>>();
1248
1249        let max_columns = num_columns.iter().map(|(_, span)| span).max().unwrap_or(&1);
1250
1251        for (i, &(has_zero, num_cols)) in num_columns.iter().enumerate() {
1252            // Note this won't be sensible if more than one column has colspan=0,
1253            // but that's not very well defined anyway.
1254            if has_zero {
1255                for cell in rows[i].cells_mut() {
1256                    if cell.colspan == 0 {
1257                        // +1 because we said it had 1 to start with
1258                        cell.colspan = max_columns - num_cols + 1;
1259                    }
1260                }
1261            }
1262        }
1263
1264        Some(RenderNode::new_styled(
1265            RenderNodeInfo::TableBody(rows),
1266            computed,
1267        ))
1268    })
1269}
1270
1271/// Convert a table row to a RenderTableRow
1272fn tr_to_render_tree<'a, T: Write>(
1273    input: RenderInput,
1274    computed: ComputedStyle,
1275    _err_out: &mut T,
1276) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1277    pending(input, move |_, cellnodes| {
1278        let cells = cellnodes
1279            .into_iter()
1280            .flat_map(|cellnode| {
1281                if let RenderNodeInfo::TableCell(cell) = cellnode.info {
1282                    Some(cell)
1283                } else {
1284                    html_trace!("  [[tr child: {:?}]]", cellnode);
1285                    None
1286                }
1287            })
1288            .collect();
1289        let style = computed.clone();
1290        Some(RenderNode::new_styled(
1291            RenderNodeInfo::TableRow(
1292                RenderTableRow {
1293                    cells,
1294                    col_sizes: None,
1295                    style,
1296                },
1297                false,
1298            ),
1299            computed,
1300        ))
1301    })
1302}
1303
1304/// Convert a single table cell to a render node.
1305fn td_to_render_tree<'a, T: Write>(
1306    input: RenderInput,
1307    computed: ComputedStyle,
1308    _err_out: &mut T,
1309) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1310    let mut colspan = 1;
1311    let mut rowspan = 1;
1312    if let Element { ref attrs, .. } = input.handle.data {
1313        for attr in attrs.borrow().iter() {
1314            if &attr.name.local == "colspan" {
1315                let v: &str = &attr.value;
1316                colspan = v.parse().unwrap_or(1);
1317            }
1318            if &attr.name.local == "rowspan" {
1319                let v: &str = &attr.value;
1320                rowspan = v.parse().unwrap_or(1);
1321            }
1322        }
1323    }
1324    pending(input, move |_, children| {
1325        let style = computed.clone();
1326        Some(RenderNode::new_styled(
1327            RenderNodeInfo::TableCell(RenderTableCell {
1328                colspan,
1329                rowspan,
1330                content: children,
1331                size_estimate: Cell::new(None),
1332                col_width: None,
1333                x_pos: None,
1334                style,
1335                is_dummy: false,
1336            }),
1337            computed,
1338        ))
1339    })
1340}
1341
1342/// A reducer which combines results from mapping children into
1343/// the result for the current node.  Takes a context and a
1344/// vector of results and returns a new result (or nothing).
1345type ResultReducer<'a, C, R> = dyn FnOnce(&mut C, Vec<R>) -> Result<Option<R>> + 'a;
1346
1347/// A closure to call before processing a child node.
1348type ChildPreFn<C, N> = dyn Fn(&mut C, &N) -> Result<()>;
1349
1350/// A closure to call after processing a child node,
1351/// before adding the result to the processed results
1352/// vector.
1353type ChildPostFn<C, R> = dyn Fn(&mut C, &R) -> Result<()>;
1354
1355/// The result of trying to render one node.
1356enum TreeMapResult<'a, C, N, R> {
1357    /// A completed result.
1358    Finished(R),
1359    /// Deferred completion - can be turned into a result
1360    /// once the vector of children are processed.
1361    PendingChildren {
1362        children: Vec<N>,
1363        cons: Box<ResultReducer<'a, C, R>>,
1364        prefn: Option<Box<ChildPreFn<C, N>>>,
1365        postfn: Option<Box<ChildPostFn<C, R>>>,
1366    },
1367    /// Nothing (e.g. a comment or other ignored element).
1368    Nothing,
1369}
1370
1371fn tree_map_reduce<'a, C, N, R, M>(
1372    context: &mut C,
1373    top: N,
1374    mut process_node: M,
1375) -> Result<Option<R>>
1376where
1377    M: FnMut(&mut C, N) -> Result<TreeMapResult<'a, C, N, R>>,
1378{
1379    /// A node partially decoded, waiting for its children to
1380    /// be processed.
1381    struct PendingNode<'a, C, R, N> {
1382        /// How to make the node once finished
1383        construct: Box<ResultReducer<'a, C, R>>,
1384        /// Called before processing each child
1385        prefn: Option<Box<ChildPreFn<C, N>>>,
1386        /// Called after processing each child
1387        postfn: Option<Box<ChildPostFn<C, R>>>,
1388        /// Children already processed
1389        children: Vec<R>,
1390        /// Iterator of child nodes not yet processed
1391        to_process: std::vec::IntoIter<N>,
1392    }
1393
1394    let mut last = PendingNode {
1395        // We only expect one child, which we'll just return.
1396        construct: Box::new(|_, mut cs| Ok(cs.pop())),
1397        prefn: None,
1398        postfn: None,
1399        children: Vec::new(),
1400        to_process: vec![top].into_iter(),
1401    };
1402    let mut pending_stack = Vec::new();
1403    loop {
1404        // Get the next child node to process
1405        while let Some(h) = last.to_process.next() {
1406            if let Some(f) = &last.prefn {
1407                f(context, &h)?;
1408            }
1409            match process_node(context, h)? {
1410                TreeMapResult::Finished(result) => {
1411                    if let Some(f) = &last.postfn {
1412                        f(context, &result)?;
1413                    }
1414                    last.children.push(result);
1415                }
1416                TreeMapResult::PendingChildren {
1417                    children,
1418                    cons,
1419                    prefn,
1420                    postfn,
1421                } => {
1422                    pending_stack.push(last);
1423                    last = PendingNode {
1424                        construct: cons,
1425                        prefn,
1426                        postfn,
1427                        children: Vec::new(),
1428                        to_process: children.into_iter(),
1429                    };
1430                }
1431                TreeMapResult::Nothing => {}
1432            };
1433        }
1434        // No more children, so finally construct the parent.
1435        if let Some(mut parent) = pending_stack.pop() {
1436            if let Some(node) = (last.construct)(context, last.children)? {
1437                if let Some(f) = &parent.postfn {
1438                    f(context, &node)?;
1439                }
1440                parent.children.push(node);
1441            }
1442            last = parent;
1443            continue;
1444        }
1445        // Finished the whole stack!
1446        break Ok((last.construct)(context, last.children)?);
1447    }
1448}
1449
1450#[cfg(feature = "css_ext")]
1451#[derive(Clone, Default)]
1452struct HighlighterMap {
1453    map: HashMap<String, Rc<SyntaxHighlighter>>,
1454}
1455
1456#[cfg(feature = "css_ext")]
1457impl HighlighterMap {
1458    pub fn get(&self, name: &str) -> Option<Rc<SyntaxHighlighter>> {
1459        self.map.get(name).cloned()
1460    }
1461
1462    fn insert(&mut self, name: impl Into<String>, f: Rc<SyntaxHighlighter>) {
1463        self.map.insert(name.into(), f);
1464    }
1465}
1466
1467#[cfg(feature = "css_ext")]
1468impl std::fmt::Debug for HighlighterMap {
1469    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1470        f.debug_struct("HighlighterMap")
1471            .field("map", &self.map.keys().collect::<Vec<_>>())
1472            .finish()
1473    }
1474}
1475
1476#[cfg(feature = "css_ext")]
1477impl PartialEq for HighlighterMap {
1478    fn eq(&self, _other: &Self) -> bool {
1479        todo!()
1480    }
1481}
1482
1483#[cfg(feature = "css_ext")]
1484impl Eq for HighlighterMap {}
1485
1486#[derive(Debug, PartialEq, Eq)]
1487struct HtmlContext {
1488    style_data: css::StyleData,
1489    #[cfg(feature = "css")]
1490    use_doc_css: bool,
1491
1492    max_wrap_width: Option<usize>,
1493    pad_block_width: bool,
1494    allow_width_overflow: bool,
1495    min_wrap_width: usize,
1496    raw: bool,
1497    draw_borders: bool,
1498    wrap_links: bool,
1499    include_link_footnotes: bool,
1500    use_unicode_strikeout: bool,
1501    image_mode: config::ImageRenderMode,
1502
1503    #[cfg(feature = "css_ext")]
1504    syntax_highlighters: HighlighterMap,
1505}
1506
1507// Input to render tree conversion.
1508struct RenderInput {
1509    handle: Handle,
1510    parent_style: Rc<ComputedStyle>,
1511    // Overlay styles from syntax highlighting.
1512    #[cfg(feature = "css_ext")]
1513    extra_styles: RefCell<Vec<(Range<usize>, TextStyle)>>,
1514    // Map from node to the length of enclosed text nodes.
1515    node_lengths: Rc<RefCell<HashMap<*const Node, usize>>>,
1516}
1517
1518impl RenderInput {
1519    fn new(handle: Handle, parent_style: Rc<ComputedStyle>) -> Self {
1520        RenderInput {
1521            handle,
1522            parent_style,
1523            #[cfg(feature = "css_ext")]
1524            extra_styles: Default::default(),
1525            node_lengths: Default::default(),
1526        }
1527    }
1528
1529    #[cfg(feature = "css_ext")]
1530    fn set_syntax_info(&self, full_text: &str, highlighted: Vec<(TextStyle, &str)>) {
1531        let mut node_styles = Vec::new();
1532
1533        // Turn the returned strings into offsets into full_text.  We assume
1534        // we can maintain relative offsets as we step through the tree rendering.
1535        for (style, s) in highlighted {
1536            fn get_offset(full: &str, sub: &str) -> Option<Range<usize>> {
1537                // This looks scary, but if we get this wrong the worst case is
1538                // that we end up panicking when using the offsets.
1539                let full_start = full.as_ptr() as usize;
1540                let full_end = full_start + full.len();
1541                let sub_start = sub.as_ptr() as usize;
1542                let sub_end = sub_start + sub.len();
1543
1544                if sub_start >= full_start && sub_end <= full_end {
1545                    Some((sub_start - full_start)..(sub_end - full_start))
1546                } else {
1547                    None
1548                }
1549            }
1550
1551            if let Some(offset_range) = get_offset(full_text, s) {
1552                node_styles.push((offset_range, style));
1553            } // else we ignore the highlight.
1554        }
1555        node_styles.sort_by_key(|r| (r.0.start, r.0.end));
1556        *self.extra_styles.borrow_mut() = node_styles;
1557    }
1558
1559    // Return the children in the right form
1560    #[allow(clippy::mut_range_bound)]
1561    fn children(&self) -> Vec<RenderInput> {
1562        #[cfg(feature = "css_ext")]
1563        if !self.extra_styles.borrow().is_empty() {
1564            let mut offset = 0;
1565            let mut result = Vec::new();
1566            let mut start_style_index = 0;
1567            let node_lengths = self.node_lengths.borrow();
1568            let extra_styles = self.extra_styles.borrow();
1569            for child in &*self.handle.children.borrow() {
1570                let end_offset = offset + node_lengths.get(&Rc::as_ptr(child)).unwrap();
1571                let mut child_extra_styles = Vec::new();
1572                for es_idx in start_style_index..extra_styles.len() {
1573                    let mut style_range = extra_styles[es_idx].0.clone();
1574                    if style_range.start >= end_offset {
1575                        // We've gone too far.
1576                        break;
1577                    }
1578                    if style_range.end <= offset {
1579                        // We don't need to look at this again
1580                        // Note this is here to restart this loop in a different place
1581                        // in the next run of the outer loop; hence allowing
1582                        // clippy::mut_range_bound on the function.
1583                        start_style_index = es_idx;
1584                    }
1585                    // This piece must overlap!
1586                    // Clip the range to this node.
1587                    style_range.start = style_range.start.max(offset) - offset;
1588                    style_range.end = style_range.end.min(end_offset) - offset;
1589
1590                    child_extra_styles.push((style_range, extra_styles[es_idx].1.clone()));
1591                }
1592                result.push(RenderInput {
1593                    handle: Rc::clone(child),
1594                    parent_style: Rc::clone(&self.parent_style),
1595                    extra_styles: RefCell::new(child_extra_styles),
1596                    node_lengths: self.node_lengths.clone(),
1597                });
1598                offset = end_offset;
1599            }
1600            return result;
1601        }
1602
1603        // Simple case, and we might not have the node lengths.
1604        self.handle
1605            .children
1606            .borrow()
1607            .iter()
1608            .map(|child| RenderInput {
1609                handle: child.clone(),
1610                parent_style: Rc::clone(&self.parent_style),
1611                #[cfg(feature = "css_ext")]
1612                extra_styles: Default::default(),
1613                node_lengths: self.node_lengths.clone(),
1614            })
1615            .collect()
1616    }
1617
1618    #[cfg(feature = "css_ext")]
1619    fn do_extract_text(
1620        out: &mut String,
1621        handle: &Handle,
1622        length_map: &mut HashMap<*const Node, usize>,
1623    ) {
1624        match handle.data {
1625            markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
1626                let s: &str = &tstr.borrow();
1627                out.push_str(s);
1628                length_map.entry(Rc::as_ptr(handle)).or_insert(s.len());
1629            }
1630            _ => {
1631                for child in handle.children.borrow().iter() {
1632                    let len_before = out.len();
1633                    RenderInput::do_extract_text(out, child, length_map);
1634                    let len_after = out.len();
1635                    length_map
1636                        .entry(Rc::as_ptr(child))
1637                        .or_insert(len_after - len_before);
1638                }
1639            }
1640        }
1641    }
1642
1643    #[cfg(feature = "css_ext")]
1644    /// Return a full String, and a list of where substrings came from:
1645    ///
1646    fn extract_raw_text(&self) -> String {
1647        let mut result = String::new();
1648        RenderInput::do_extract_text(
1649            &mut result,
1650            &self.handle,
1651            &mut self.node_lengths.borrow_mut(),
1652        );
1653        result
1654    }
1655}
1656
1657fn dom_to_render_tree_with_context<T: Write>(
1658    handle: Handle,
1659    err_out: &mut T,
1660    context: &mut HtmlContext,
1661) -> Result<Option<RenderNode>> {
1662    html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
1663    #[cfg(feature = "css")]
1664    if context.use_doc_css {
1665        let mut doc_style_data = css::dom_extract::dom_to_stylesheet(handle.clone(), err_out)?;
1666        doc_style_data.merge(std::mem::take(&mut context.style_data));
1667        context.style_data = doc_style_data;
1668    }
1669
1670    let parent_style = Default::default();
1671    let result = tree_map_reduce(
1672        context,
1673        RenderInput::new(handle, parent_style),
1674        |context, input| process_dom_node(input, err_out, context),
1675    );
1676
1677    html_trace!("### dom_to_render_tree: out= {:#?}", result);
1678    result
1679}
1680
1681#[cfg(feature = "css")]
1682/// Return a string representation of the CSS rules parsed from
1683/// the DOM document.
1684pub fn dom_to_parsed_style(dom: &RcDom) -> Result<String> {
1685    let handle = dom.document.clone();
1686    let doc_style_data = css::dom_extract::dom_to_stylesheet(handle, &mut std::io::sink())?;
1687    Ok(doc_style_data.to_string())
1688}
1689
1690fn pending<F>(
1691    input: RenderInput,
1692    f: F,
1693) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1694where
1695    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1696{
1697    TreeMapResult::PendingChildren {
1698        children: input.children(),
1699        cons: Box::new(move |ctx, children| Ok(f(ctx, children))),
1700        prefn: None,
1701        postfn: None,
1702    }
1703}
1704
1705fn pending_noempty<F>(
1706    input: RenderInput,
1707    f: F,
1708) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1709where
1710    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1711{
1712    let handle = &input.handle;
1713    let style = &input.parent_style;
1714    TreeMapResult::PendingChildren {
1715        children: handle
1716            .children
1717            .borrow()
1718            .iter()
1719            .map(|child| RenderInput::new(child.clone(), Rc::clone(style)))
1720            .collect(),
1721        cons: Box::new(move |ctx, children| {
1722            if children.is_empty() {
1723                Ok(None)
1724            } else {
1725                Ok(f(ctx, children))
1726            }
1727        }),
1728        prefn: None,
1729        postfn: None,
1730    }
1731}
1732
1733#[derive(Copy, Clone, Eq, PartialEq, Debug)]
1734enum ChildPosition {
1735    Start,
1736    End,
1737}
1738
1739/// Prepend or append a FragmentStart (or analogous) marker to an existing
1740/// RenderNode.
1741fn insert_child(
1742    new_child: RenderNode,
1743    mut orig: RenderNode,
1744    position: ChildPosition,
1745) -> RenderNode {
1746    use RenderNodeInfo::*;
1747    html_trace!("insert_child({:?}, {:?}, {:?})", new_child, orig, position);
1748
1749    match orig.info {
1750        // For block elements such as Block and Div, we need to insert
1751        // the node at the front of their children array, otherwise
1752        // the renderer is liable to drop the fragment start marker
1753        // _before_ the new line indicating the end of the previous
1754        // paragraph.
1755        //
1756        // For Container, we do the same thing just to make the data
1757        // less pointlessly nested.
1758        Block(ref mut children)
1759        | ListItem(ref mut children)
1760        | Dd(ref mut children)
1761        | Dt(ref mut children)
1762        | Dl(ref mut children)
1763        | Div(ref mut children)
1764        | BlockQuote(ref mut children)
1765        | Container(ref mut children)
1766        | TableCell(RenderTableCell {
1767            content: ref mut children,
1768            ..
1769        }) => {
1770            match position {
1771                ChildPosition::Start => children.insert(0, new_child),
1772                ChildPosition::End => children.push(new_child),
1773            }
1774            // Now return orig, but we do that outside the match so
1775            // that we've given back the borrowed ref 'children'.
1776        }
1777
1778        // For table rows and tables, push down if there's any content.
1779        TableRow(ref mut rrow, _) => {
1780            // If the row is empty, then there isn't really anything
1781            // to attach the fragment start to.
1782            if let Some(cell) = rrow.cells.first_mut() {
1783                match position {
1784                    ChildPosition::Start => cell.content.insert(0, new_child),
1785                    ChildPosition::End => cell.content.push(new_child),
1786                }
1787            }
1788        }
1789
1790        TableBody(ref mut rows) | Table(RenderTable { ref mut rows, .. }) => {
1791            // If the row is empty, then there isn't really anything
1792            // to attach the fragment start to.
1793            if let Some(rrow) = rows.first_mut() {
1794                if let Some(cell) = rrow.cells.first_mut() {
1795                    match position {
1796                        ChildPosition::Start => cell.content.insert(0, new_child),
1797                        ChildPosition::End => cell.content.push(new_child),
1798                    }
1799                }
1800            }
1801        }
1802
1803        // For anything else, just make a new Container with the
1804        // new_child node and the original one.
1805        _ => {
1806            let result = match position {
1807                ChildPosition::Start => RenderNode::new(Container(vec![new_child, orig])),
1808                ChildPosition::End => RenderNode::new(Container(vec![orig, new_child])),
1809            };
1810            html_trace!("insert_child() -> {:?}", result);
1811            return result;
1812        }
1813    }
1814    html_trace!("insert_child() -> {:?}", &orig);
1815    orig
1816}
1817
1818fn process_dom_node<T: Write>(
1819    input: RenderInput,
1820    err_out: &mut T,
1821    #[allow(unused)] // Used with css feature
1822    context: &mut HtmlContext,
1823) -> Result<TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>> {
1824    use RenderNodeInfo::*;
1825    use TreeMapResult::*;
1826
1827    Ok(match input.handle.clone().data {
1828        Document => pending(input, |_context, cs| Some(RenderNode::new(Container(cs)))),
1829        Comment { .. } => Nothing,
1830        Element {
1831            ref name,
1832            ref attrs,
1833            ..
1834        } => {
1835            let mut frag_from_name_attr = false;
1836
1837            let RenderInput {
1838                ref handle,
1839                ref parent_style,
1840                ..
1841            } = input;
1842
1843            #[cfg(feature = "css")]
1844            let use_doc_css = context.use_doc_css;
1845            #[cfg(not(feature = "css"))]
1846            let use_doc_css = false;
1847
1848            let computed = {
1849                let computed = context
1850                    .style_data
1851                    .computed_style(parent_style, handle, use_doc_css);
1852                #[cfg(feature = "css")]
1853                match computed.display.val() {
1854                    Some(css::Display::None) => return Ok(Nothing),
1855                    #[cfg(feature = "css_ext")]
1856                    Some(css::Display::ExtRawDom) => {
1857                        use html5ever::interface::{NodeOrText, TreeSink};
1858                        use html5ever::{LocalName, QualName};
1859                        let mut html_bytes: Vec<u8> = Default::default();
1860                        handle.serialize(&mut html_bytes)?;
1861
1862                        // Make a new DOM object so that we can easily create new
1863                        // nodes.  They will be independent.
1864                        let dom = RcDom::default();
1865
1866                        // We'll enclose it in a `<pre>`, so that we have an element in the right
1867                        // shape to process.
1868                        let html_string = String::from_utf8_lossy(&html_bytes).into_owned();
1869                        let pre_node = dom.create_element(
1870                            QualName::new(None, ns!(html), LocalName::from("pre")),
1871                            vec![],
1872                            Default::default(),
1873                        );
1874                        dom.append(&pre_node, NodeOrText::AppendText(html_string.into()));
1875
1876                        // Remove the RawDom setting; we don't want to be recursively converting to
1877                        // raw DOM.
1878                        let mut my_computed = computed;
1879                        my_computed.display = Default::default();
1880                        // Preformat it
1881                        my_computed.white_space.maybe_update(
1882                            false,
1883                            StyleOrigin::Agent,
1884                            Default::default(),
1885                            WhiteSpace::Pre,
1886                        );
1887                        my_computed.internal_pre = true;
1888
1889                        let new_input = RenderInput {
1890                            handle: pre_node,
1891                            parent_style: Rc::new(my_computed.clone()),
1892                            extra_styles: Default::default(),
1893                            node_lengths: Default::default(),
1894                        };
1895
1896                        if let Some(syntax_info) = my_computed.syntax.val() {
1897                            if let Some(highlighter) =
1898                                context.syntax_highlighters.get(&syntax_info.language)
1899                            {
1900                                // Do the highlighting here.
1901                                let text = new_input.extract_raw_text();
1902                                let highlighted = highlighter(&text);
1903                                new_input.set_syntax_info(&text, highlighted);
1904                            }
1905                        }
1906                        return Ok(pending(new_input, move |_, cs| {
1907                            Some(RenderNode::new_styled(Container(cs), my_computed))
1908                        }));
1909                    }
1910                    _ => (),
1911                }
1912                #[cfg(feature = "css_ext")]
1913                if let Some(syntax_info) = computed.syntax.val() {
1914                    if let Some(highlighter) =
1915                        context.syntax_highlighters.get(&syntax_info.language)
1916                    {
1917                        let extracted_text = input.extract_raw_text();
1918                        let highlighted = highlighter(&extracted_text);
1919                        input.set_syntax_info(&extracted_text, highlighted);
1920                    }
1921                }
1922
1923                computed
1924            };
1925
1926            let computed_before = computed.content_before.clone();
1927            let computed_after = computed.content_after.clone();
1928
1929            let result = match name.expanded() {
1930                expanded_name!(html "html") | expanded_name!(html "body") => {
1931                    /* process children, but don't add anything */
1932                    pending(input, move |_, cs| {
1933                        Some(RenderNode::new_styled(Container(cs), computed))
1934                    })
1935                }
1936                expanded_name!(html "link")
1937                | expanded_name!(html "meta")
1938                | expanded_name!(html "hr")
1939                | expanded_name!(html "script")
1940                | expanded_name!(html "style")
1941                | expanded_name!(html "head") => {
1942                    /* Ignore the head and its children */
1943                    Nothing
1944                }
1945                expanded_name!(html "span") => {
1946                    /* process children, but don't add anything */
1947                    pending_noempty(input, move |_, cs| {
1948                        Some(RenderNode::new_styled(Container(cs), computed))
1949                    })
1950                }
1951                expanded_name!(html "a") => {
1952                    let borrowed = attrs.borrow();
1953                    let mut target = None;
1954                    frag_from_name_attr = true;
1955                    for attr in borrowed.iter() {
1956                        if &attr.name.local == "href" {
1957                            target = Some(&*attr.value);
1958                            break;
1959                        }
1960                    }
1961                    PendingChildren {
1962                        children: input.children(),
1963                        cons: if let Some(href) = target {
1964                            let href: String = href.into();
1965                            Box::new(move |_, cs: Vec<RenderNode>| {
1966                                if cs.iter().any(|c| !c.is_shallow_empty()) {
1967                                    Ok(Some(RenderNode::new_styled(Link(href, cs), computed)))
1968                                } else {
1969                                    Ok(None)
1970                                }
1971                            })
1972                        } else {
1973                            Box::new(move |_, cs| {
1974                                Ok(Some(RenderNode::new_styled(Container(cs), computed)))
1975                            })
1976                        },
1977                        prefn: None,
1978                        postfn: None,
1979                    }
1980                }
1981                expanded_name!(html "em")
1982                | expanded_name!(html "i")
1983                | expanded_name!(html "ins") => pending(input, move |_, cs| {
1984                    Some(RenderNode::new_styled(Em(cs), computed))
1985                }),
1986                expanded_name!(html "strong") => pending(input, move |_, cs| {
1987                    Some(RenderNode::new_styled(Strong(cs), computed))
1988                }),
1989                expanded_name!(html "s") | expanded_name!(html "del") => {
1990                    pending(input, move |_, cs| {
1991                        Some(RenderNode::new_styled(Strikeout(cs), computed))
1992                    })
1993                }
1994                expanded_name!(html "code") => pending(input, move |_, cs| {
1995                    Some(RenderNode::new_styled(Code(cs), computed))
1996                }),
1997                expanded_name!(html "img") => {
1998                    let borrowed = attrs.borrow();
1999                    let mut title = None;
2000                    let mut src = None;
2001                    for attr in borrowed.iter() {
2002                        if &attr.name.local == "alt" && !attr.value.is_empty() {
2003                            title = Some(&*attr.value);
2004                        }
2005                        if &attr.name.local == "src" && !attr.value.is_empty() {
2006                            src = Some(&*attr.value);
2007                        }
2008                        if title.is_some() && src.is_some() {
2009                            break;
2010                        }
2011                    }
2012                    // Ignore `<img>` without src.
2013                    if let Some(src) = src {
2014                        Finished(RenderNode::new_styled(
2015                            Img(src.into(), title.unwrap_or("").into()),
2016                            computed,
2017                        ))
2018                    } else {
2019                        Nothing
2020                    }
2021                }
2022                expanded_name!(svg "svg") => {
2023                    // Inline SVG: look for a <title> child for the title.
2024                    let mut title = None;
2025
2026                    for node in input.handle.children.borrow().iter() {
2027                        if let markup5ever_rcdom::NodeData::Element { ref name, .. } = node.data {
2028                            if matches!(name.expanded(), expanded_name!(svg "title")) {
2029                                let mut title_str = String::new();
2030                                for subnode in node.children.borrow().iter() {
2031                                    if let markup5ever_rcdom::NodeData::Text { ref contents } =
2032                                        subnode.data
2033                                    {
2034                                        title_str.push_str(&contents.borrow());
2035                                    }
2036                                }
2037                                title = Some(title_str);
2038                            } else {
2039                                // The first item has to be <title>
2040                                break;
2041                            }
2042                        }
2043                    }
2044
2045                    Finished(RenderNode::new_styled(
2046                        Svg(title.unwrap_or_else(|| String::new())),
2047                        computed,
2048                    ))
2049                }
2050                expanded_name!(html "h1")
2051                | expanded_name!(html "h2")
2052                | expanded_name!(html "h3")
2053                | expanded_name!(html "h4")
2054                | expanded_name!(html "h5")
2055                | expanded_name!(html "h6") => {
2056                    let level: usize = name.local[1..].parse().unwrap();
2057                    pending(input, move |_, cs| {
2058                        Some(RenderNode::new_styled(Header(level, cs), computed))
2059                    })
2060                }
2061                expanded_name!(html "p") => pending_noempty(input, move |_, cs| {
2062                    Some(RenderNode::new_styled(Block(cs), computed))
2063                }),
2064                expanded_name!(html "li") => pending(input, move |_, cs| {
2065                    Some(RenderNode::new_styled(ListItem(cs), computed))
2066                }),
2067                expanded_name!(html "sup") => pending(input, move |_, cs| {
2068                    Some(RenderNode::new_styled(Sup(cs), computed))
2069                }),
2070                expanded_name!(html "div") => pending_noempty(input, move |_, cs| {
2071                    Some(RenderNode::new_styled(Div(cs), computed))
2072                }),
2073                expanded_name!(html "pre") => pending(input, move |_, cs| {
2074                    let mut computed = computed;
2075                    computed.white_space.maybe_update(
2076                        false,
2077                        StyleOrigin::Agent,
2078                        Default::default(),
2079                        WhiteSpace::Pre,
2080                    );
2081                    computed.internal_pre = true;
2082                    Some(RenderNode::new_styled(Block(cs), computed))
2083                }),
2084                expanded_name!(html "br") => Finished(RenderNode::new_styled(Break, computed)),
2085                expanded_name!(html "wbr") => {
2086                    Finished(RenderNode::new_styled(Text("\u{200b}".into()), computed))
2087                }
2088                expanded_name!(html "table") => table_to_render_tree(input, computed, err_out),
2089                expanded_name!(html "thead") | expanded_name!(html "tbody") => {
2090                    tbody_to_render_tree(input, computed, err_out)
2091                }
2092                expanded_name!(html "tr") => tr_to_render_tree(input, computed, err_out),
2093                expanded_name!(html "th") | expanded_name!(html "td") => {
2094                    td_to_render_tree(input, computed, err_out)
2095                }
2096                expanded_name!(html "blockquote") => pending_noempty(input, move |_, cs| {
2097                    Some(RenderNode::new_styled(BlockQuote(cs), computed))
2098                }),
2099                expanded_name!(html "ul") => pending_noempty(input, move |_, cs| {
2100                    Some(RenderNode::new_styled(Ul(cs), computed))
2101                }),
2102                expanded_name!(html "ol") => {
2103                    let borrowed = attrs.borrow();
2104                    let mut start = 1;
2105                    for attr in borrowed.iter() {
2106                        if &attr.name.local == "start" {
2107                            start = attr.value.parse().ok().unwrap_or(1);
2108                            break;
2109                        }
2110                    }
2111
2112                    pending_noempty(input, move |_, cs| {
2113                        // There can be extra nodes which aren't ListItem (like whitespace text
2114                        // nodes).  We need to filter those out to avoid messing up the rendering.
2115                        let cs = cs
2116                            .into_iter()
2117                            .filter(|n| matches!(n.info, RenderNodeInfo::ListItem(..)))
2118                            .collect();
2119                        Some(RenderNode::new_styled(Ol(start, cs), computed))
2120                    })
2121                }
2122                expanded_name!(html "dl") => {
2123                    pending_noempty(input, move |_, cs| {
2124                        // There can be extra nodes which aren't Dt or Dd (like whitespace text
2125                        // nodes).  We need to filter those out to avoid messing up the rendering.
2126                        let cs = cs
2127                            .into_iter()
2128                            .filter(|n| {
2129                                matches!(n.info, RenderNodeInfo::Dt(..) | RenderNodeInfo::Dd(..))
2130                            })
2131                            .collect();
2132                        Some(RenderNode::new_styled(Dl(cs), computed))
2133                    })
2134                }
2135                expanded_name!(html "dt") => pending(input, move |_, cs| {
2136                    Some(RenderNode::new_styled(Dt(cs), computed))
2137                }),
2138                expanded_name!(html "dd") => pending(input, move |_, cs| {
2139                    Some(RenderNode::new_styled(Dd(cs), computed))
2140                }),
2141                _ => {
2142                    html_trace!("Unhandled element: {:?}\n", name.local);
2143                    pending_noempty(input, move |_, cs| {
2144                        Some(RenderNode::new_styled(Container(cs), computed))
2145                    })
2146                }
2147            };
2148
2149            let mut fragment = None;
2150            let borrowed = attrs.borrow();
2151            for attr in borrowed.iter() {
2152                if &attr.name.local == "id" || (frag_from_name_attr && &attr.name.local == "name") {
2153                    fragment = Some(attr.value.to_string());
2154                    break;
2155                }
2156            }
2157
2158            let result = if computed_before.is_some() || computed_after.is_some() {
2159                let wrap_nodes = move |mut node: RenderNode| {
2160                    if let Some(ref content) = computed_before {
2161                        if let Some(pseudo_content) = content.content.val() {
2162                            node = insert_child(
2163                                RenderNode::new(Text(pseudo_content.text.clone())),
2164                                node,
2165                                ChildPosition::Start,
2166                            );
2167                        }
2168                    }
2169                    if let Some(ref content) = computed_after {
2170                        if let Some(pseudo_content) = content.content.val() {
2171                            node = insert_child(
2172                                RenderNode::new(Text(pseudo_content.text.clone())),
2173                                node,
2174                                ChildPosition::End,
2175                            );
2176                        }
2177                    }
2178                    node
2179                };
2180                // Insert extra content nodes
2181                match result {
2182                    Finished(node) => Finished(wrap_nodes(node)),
2183                    // Do we need to wrap a Nothing?
2184                    Nothing => Nothing,
2185                    PendingChildren {
2186                        children,
2187                        cons,
2188                        prefn,
2189                        postfn,
2190                    } => PendingChildren {
2191                        children,
2192                        prefn,
2193                        postfn,
2194                        cons: Box::new(move |ctx, ch| match cons(ctx, ch)? {
2195                            None => Ok(None),
2196                            Some(node) => Ok(Some(wrap_nodes(node))),
2197                        }),
2198                    },
2199                }
2200            } else {
2201                result
2202            };
2203
2204            let Some(fragname) = fragment else {
2205                return Ok(result);
2206            };
2207            match result {
2208                Finished(node) => Finished(insert_child(
2209                    RenderNode::new(FragStart(fragname)),
2210                    node,
2211                    ChildPosition::Start,
2212                )),
2213                Nothing => Finished(RenderNode::new(FragStart(fragname))),
2214                PendingChildren {
2215                    children,
2216                    cons,
2217                    prefn,
2218                    postfn,
2219                } => PendingChildren {
2220                    children,
2221                    prefn,
2222                    postfn,
2223                    cons: Box::new(move |ctx, ch| {
2224                        let fragnode = RenderNode::new(FragStart(fragname));
2225                        match cons(ctx, ch)? {
2226                            None => Ok(Some(fragnode)),
2227                            Some(node) => {
2228                                Ok(Some(insert_child(fragnode, node, ChildPosition::Start)))
2229                            }
2230                        }
2231                    }),
2232                },
2233            }
2234        }
2235        markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
2236            #[cfg(feature = "css_ext")]
2237            if !input.extra_styles.borrow().is_empty() {
2238                let mut nodes = Vec::new();
2239                let mut offset = 0;
2240                for part in &*input.extra_styles.borrow() {
2241                    let (start, end) = (part.0.start, part.0.end);
2242                    if start > offset {
2243                        // Handle the unstyled bit at the start
2244                        nodes.push(RenderNode::new(Text((tstr.borrow()[offset..start]).into())));
2245                    }
2246                    let mut cstyle = input.parent_style.inherit();
2247                    cstyle.colour.maybe_update(
2248                        // TODO: use the right specificity
2249                        cstyle.syntax.important,
2250                        cstyle.syntax.origin,
2251                        cstyle.syntax.specificity,
2252                        part.1.fg_colour,
2253                    );
2254                    if let Some(bgcol) = part.1.bg_colour {
2255                        cstyle.bg_colour.maybe_update(
2256                            // TODO: use the right specificity
2257                            cstyle.syntax.important,
2258                            cstyle.syntax.origin,
2259                            cstyle.syntax.specificity,
2260                            bgcol,
2261                        );
2262                    }
2263                    // Now the styled part
2264                    nodes.push(RenderNode::new_styled(
2265                        Text((tstr.borrow()[start..end]).into()),
2266                        cstyle,
2267                    ));
2268                    offset = end;
2269                }
2270                // the final bit
2271                if offset < tstr.borrow().len() {
2272                    nodes.push(RenderNode::new(Text((tstr.borrow()[offset..]).into())));
2273                }
2274                if nodes.len() == 1 {
2275                    return Ok(Finished(nodes.pop().unwrap()));
2276                } else {
2277                    return Ok(Finished(RenderNode::new(RenderNodeInfo::Container(nodes))));
2278                }
2279            }
2280
2281            Finished(RenderNode::new(Text((&*tstr.borrow()).into())))
2282        }
2283        _ => {
2284            // NodeData doesn't have a Debug impl.
2285            writeln!(err_out, "Unhandled node type.").unwrap();
2286            Nothing
2287        }
2288    })
2289}
2290
2291fn render_tree_to_string<T: Write, D: TextDecorator>(
2292    context: &mut HtmlContext,
2293    renderer: SubRenderer<D>,
2294    decorator: &D,
2295    tree: RenderNode,
2296    err_out: &mut T,
2297) -> Result<SubRenderer<D>> {
2298    /* Phase 1: get size estimates. */
2299    // can't actually error, but Ok-wrap to satisfy tree_map_reduce signature
2300    tree_map_reduce(context, &tree, |context, node| {
2301        Ok(precalc_size_estimate(node, context, decorator))
2302    })?;
2303    /* Phase 2: actually render. */
2304    let mut renderer = TextRenderer::new(renderer);
2305    tree_map_reduce(&mut renderer, tree, |renderer, node| {
2306        Ok(do_render_node(renderer, node, err_out)?)
2307    })?;
2308    let (mut renderer, links) = renderer.into_inner();
2309    let lines = renderer.finalise(links);
2310    // And add the links
2311    if !lines.is_empty() {
2312        renderer.start_block()?;
2313        renderer.fmt_links(lines);
2314    }
2315    Ok(renderer)
2316}
2317
2318fn pending2<
2319    D: TextDecorator,
2320    F: FnOnce(
2321            &mut TextRenderer<D>,
2322            Vec<Option<SubRenderer<D>>>,
2323        ) -> Result<Option<Option<SubRenderer<D>>>>
2324        + 'static,
2325>(
2326    children: Vec<RenderNode>,
2327    f: F,
2328) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2329    TreeMapResult::PendingChildren {
2330        children,
2331        cons: Box::new(f),
2332        prefn: None,
2333        postfn: None,
2334    }
2335}
2336
2337/// Keep track of what style state has been applied to a renderer so that we
2338/// can undo it.
2339#[derive(Default)]
2340struct PushedStyleInfo {
2341    colour: bool,
2342    bgcolour: bool,
2343    white_space: bool,
2344    preformat: bool,
2345}
2346
2347impl PushedStyleInfo {
2348    fn apply<D: TextDecorator>(render: &mut TextRenderer<D>, style: &ComputedStyle) -> Self {
2349        #[allow(unused_mut)]
2350        let mut result: PushedStyleInfo = Default::default();
2351        #[cfg(feature = "css")]
2352        if let Some(col) = style.colour.val() {
2353            render.push_colour(*col);
2354            result.colour = true;
2355        }
2356        #[cfg(feature = "css")]
2357        if let Some(col) = style.bg_colour.val() {
2358            render.push_bgcolour(*col);
2359            result.bgcolour = true;
2360        }
2361        if let Some(ws) = style.white_space.val() {
2362            if let WhiteSpace::Pre | WhiteSpace::PreWrap = ws {
2363                render.push_ws(*ws);
2364                result.white_space = true;
2365            }
2366        }
2367        if style.internal_pre {
2368            render.push_preformat();
2369            result.preformat = true;
2370        }
2371        result
2372    }
2373    fn unwind<D: TextDecorator>(self, renderer: &mut TextRenderer<D>) {
2374        if self.bgcolour {
2375            renderer.pop_bgcolour();
2376        }
2377        if self.colour {
2378            renderer.pop_colour();
2379        }
2380        if self.white_space {
2381            renderer.pop_ws();
2382        }
2383        if self.preformat {
2384            renderer.pop_preformat();
2385        }
2386    }
2387}
2388
2389fn do_render_node<T: Write, D: TextDecorator>(
2390    renderer: &mut TextRenderer<D>,
2391    tree: RenderNode,
2392    err_out: &mut T,
2393) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2394    html_trace!("do_render_node({:?}", tree);
2395    use RenderNodeInfo::*;
2396    use TreeMapResult::*;
2397
2398    let size_estimate = tree.size_estimate.get().unwrap_or_default();
2399
2400    let pushed_style = PushedStyleInfo::apply(renderer, &tree.style);
2401
2402    Ok(match tree.info {
2403        Text(ref tstr) => {
2404            renderer.add_inline_text(tstr)?;
2405            pushed_style.unwind(renderer);
2406            Finished(None)
2407        }
2408        Container(children) => pending2(children, |renderer, _| {
2409            pushed_style.unwind(renderer);
2410            Ok(Some(None))
2411        }),
2412        Link(href, children) => {
2413            renderer.start_link(&href)?;
2414            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2415                renderer.end_link()?;
2416                pushed_style.unwind(renderer);
2417                Ok(Some(None))
2418            })
2419        }
2420        Em(children) => {
2421            renderer.start_emphasis()?;
2422            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2423                renderer.end_emphasis()?;
2424                pushed_style.unwind(renderer);
2425                Ok(Some(None))
2426            })
2427        }
2428        Strong(children) => {
2429            renderer.start_strong()?;
2430            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2431                renderer.end_strong()?;
2432                pushed_style.unwind(renderer);
2433                Ok(Some(None))
2434            })
2435        }
2436        Strikeout(children) => {
2437            renderer.start_strikeout()?;
2438            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2439                renderer.end_strikeout()?;
2440                pushed_style.unwind(renderer);
2441                Ok(Some(None))
2442            })
2443        }
2444        Code(children) => {
2445            renderer.start_code()?;
2446            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2447                renderer.end_code()?;
2448                pushed_style.unwind(renderer);
2449                Ok(Some(None))
2450            })
2451        }
2452        Img(src, title) => {
2453            renderer.add_image(&src, &title)?;
2454            pushed_style.unwind(renderer);
2455            Finished(None)
2456        }
2457        Svg(title) => {
2458            renderer.add_image("", &title)?;
2459            pushed_style.unwind(renderer);
2460            Finished(None)
2461        }
2462        Block(children) | ListItem(children) => {
2463            renderer.start_block()?;
2464            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2465                renderer.end_block();
2466                pushed_style.unwind(renderer);
2467                Ok(Some(None))
2468            })
2469        }
2470        Header(level, children) => {
2471            let prefix = renderer.header_prefix(level);
2472            let prefix_size = size_estimate.prefix_size;
2473            debug_assert!(prefix.len() == prefix_size);
2474            let min_width = size_estimate.min_width;
2475            let inner_width = min_width.saturating_sub(prefix_size);
2476            let sub_builder =
2477                renderer.new_sub_renderer(renderer.width_minus(prefix_size, inner_width)?)?;
2478            renderer.push(sub_builder);
2479            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2480                let sub_builder = renderer.pop();
2481
2482                renderer.start_block()?;
2483                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2484                renderer.end_block();
2485                pushed_style.unwind(renderer);
2486                Ok(Some(None))
2487            })
2488        }
2489        Div(children) => {
2490            renderer.new_line()?;
2491            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2492                renderer.new_line()?;
2493                pushed_style.unwind(renderer);
2494                Ok(Some(None))
2495            })
2496        }
2497        BlockQuote(children) => {
2498            let prefix = renderer.quote_prefix();
2499            debug_assert!(size_estimate.prefix_size == prefix.len());
2500            let inner_width = size_estimate.min_width - prefix.len();
2501            let sub_builder =
2502                renderer.new_sub_renderer(renderer.width_minus(prefix.len(), inner_width)?)?;
2503            renderer.push(sub_builder);
2504            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2505                let sub_builder = renderer.pop();
2506
2507                renderer.start_block()?;
2508                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2509                renderer.end_block();
2510                pushed_style.unwind(renderer);
2511                Ok(Some(None))
2512            })
2513        }
2514        Ul(items) => {
2515            let prefix = renderer.unordered_item_prefix();
2516            let prefix_len = prefix.len();
2517
2518            TreeMapResult::PendingChildren {
2519                children: items,
2520                cons: Box::new(|renderer, _| {
2521                    pushed_style.unwind(renderer);
2522                    Ok(Some(None))
2523                }),
2524                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2525                    let inner_width = size_estimate.min_width - prefix_len;
2526                    let sub_builder = renderer
2527                        .new_sub_renderer(renderer.width_minus(prefix_len, inner_width)?)?;
2528                    renderer.push(sub_builder);
2529                    Ok(())
2530                })),
2531                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2532                    let sub_builder = renderer.pop();
2533
2534                    let indent = " ".repeat(prefix.len());
2535
2536                    renderer.append_subrender(
2537                        sub_builder,
2538                        once(&prefix[..]).chain(repeat(&indent[..])),
2539                    )?;
2540                    Ok(())
2541                })),
2542            }
2543        }
2544        Ol(start, items) => {
2545            let num_items = items.len();
2546
2547            // The prefix width could be at either end if the start is negative.
2548            let min_number = start;
2549            // Assumption: num_items can't overflow isize.
2550            let max_number = start + (num_items as i64) - 1;
2551            let prefix_width_min = renderer.ordered_item_prefix(min_number).len();
2552            let prefix_width_max = renderer.ordered_item_prefix(max_number).len();
2553            let prefix_width = max(prefix_width_min, prefix_width_max);
2554            let prefixn = format!("{: <width$}", "", width = prefix_width);
2555            let i: Cell<_> = Cell::new(start);
2556
2557            TreeMapResult::PendingChildren {
2558                children: items,
2559                cons: Box::new(|renderer, _| {
2560                    pushed_style.unwind(renderer);
2561                    Ok(Some(None))
2562                }),
2563                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2564                    let inner_min = size_estimate.min_width - size_estimate.prefix_size;
2565                    let sub_builder = renderer
2566                        .new_sub_renderer(renderer.width_minus(prefix_width, inner_min)?)?;
2567                    renderer.push(sub_builder);
2568                    Ok(())
2569                })),
2570                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2571                    let sub_builder = renderer.pop();
2572                    let prefix1 = renderer.ordered_item_prefix(i.get());
2573                    let prefix1 = format!("{: <width$}", prefix1, width = prefix_width);
2574
2575                    renderer.append_subrender(
2576                        sub_builder,
2577                        once(prefix1.as_str()).chain(repeat(prefixn.as_str())),
2578                    )?;
2579                    i.set(i.get() + 1);
2580                    Ok(())
2581                })),
2582            }
2583        }
2584        Dl(items) => {
2585            renderer.start_block()?;
2586
2587            TreeMapResult::PendingChildren {
2588                children: items,
2589                cons: Box::new(|renderer, _| {
2590                    pushed_style.unwind(renderer);
2591                    Ok(Some(None))
2592                }),
2593                prefn: None,
2594                postfn: None,
2595            }
2596        }
2597        Dt(children) => {
2598            renderer.new_line()?;
2599            renderer.start_emphasis()?;
2600            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2601                renderer.end_emphasis()?;
2602                pushed_style.unwind(renderer);
2603                Ok(Some(None))
2604            })
2605        }
2606        Dd(children) => {
2607            let inner_min = size_estimate.min_width - 2;
2608            let sub_builder = renderer.new_sub_renderer(renderer.width_minus(2, inner_min)?)?;
2609            renderer.push(sub_builder);
2610            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2611                let sub_builder = renderer.pop();
2612                renderer.append_subrender(sub_builder, repeat("  "))?;
2613                pushed_style.unwind(renderer);
2614                Ok(Some(None))
2615            })
2616        }
2617        Break => {
2618            renderer.new_line_hard()?;
2619            pushed_style.unwind(renderer);
2620            Finished(None)
2621        }
2622        Table(tab) => render_table_tree(renderer, tab, err_out)?,
2623        TableRow(row, false) => render_table_row(renderer, row, pushed_style, err_out),
2624        TableRow(row, true) => render_table_row_vert(renderer, row, pushed_style, err_out),
2625        TableBody(_) => unimplemented!("Unexpected TableBody while rendering"),
2626        TableCell(cell) => render_table_cell(renderer, cell, pushed_style, err_out),
2627        FragStart(fragname) => {
2628            renderer.record_frag_start(&fragname);
2629            pushed_style.unwind(renderer);
2630            Finished(None)
2631        }
2632        Sup(children) => {
2633            // Special case for digit-only superscripts - use superscript
2634            // characters.
2635            fn sup_digits(children: &[RenderNode]) -> Option<String> {
2636                let [node] = children else {
2637                    return None;
2638                };
2639                if let Text(s) = &node.info {
2640                    if s.chars().all(|d| d.is_ascii_digit()) {
2641                        // It's just a string of digits - replace by superscript characters.
2642                        const SUPERSCRIPTS: [char; 10] =
2643                            ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹'];
2644                        return Some(
2645                            s.bytes()
2646                                .map(|b| SUPERSCRIPTS[(b - b'0') as usize])
2647                                .collect(),
2648                        );
2649                    }
2650                }
2651                None
2652            }
2653            if let Some(digitstr) = sup_digits(&children) {
2654                renderer.add_inline_text(&digitstr)?;
2655                pushed_style.unwind(renderer);
2656                Finished(None)
2657            } else {
2658                renderer.start_superscript()?;
2659                pending2(children, |renderer: &mut TextRenderer<D>, _| {
2660                    renderer.end_superscript()?;
2661                    pushed_style.unwind(renderer);
2662                    Ok(Some(None))
2663                })
2664            }
2665        }
2666    })
2667}
2668
2669fn render_table_tree<T: Write, D: TextDecorator>(
2670    renderer: &mut TextRenderer<D>,
2671    table: RenderTable,
2672    _err_out: &mut T,
2673) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2674    /* Now lay out the table. */
2675    let num_columns = table.num_columns;
2676
2677    /* Heuristic: scale the column widths according to how much content there is. */
2678    let mut col_sizes: Vec<SizeEstimate> = vec![Default::default(); num_columns];
2679
2680    for row in table.rows() {
2681        let mut colno = 0;
2682        for cell in row.cells() {
2683            // FIXME: get_size_estimate is still recursive.
2684            let mut estimate = cell.get_size_estimate();
2685
2686            // If the cell has a colspan>1, then spread its size between the
2687            // columns.
2688            estimate.size /= cell.colspan;
2689            estimate.min_width /= cell.colspan;
2690            for i in 0..cell.colspan {
2691                col_sizes[colno + i] = (col_sizes[colno + i]).max(estimate);
2692            }
2693            colno += cell.colspan;
2694        }
2695    }
2696    let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
2697    let min_size: usize = col_sizes.iter().map(|est| est.min_width).sum::<usize>()
2698        + col_sizes.len().saturating_sub(1);
2699    let width = renderer.width();
2700
2701    let vert_row = renderer.options.raw || (min_size > width || width == 0);
2702
2703    let mut col_widths: Vec<usize> = if !vert_row {
2704        col_sizes
2705            .iter()
2706            .map(|sz| {
2707                if sz.size == 0 {
2708                    0
2709                } else {
2710                    min(
2711                        sz.size,
2712                        if usize::MAX / width <= sz.size {
2713                            // The provided width is too large to multiply by width,
2714                            // so do it the other way around.
2715                            max((width / tot_size) * sz.size, sz.min_width)
2716                        } else {
2717                            max(sz.size * width / tot_size, sz.min_width)
2718                        },
2719                    )
2720                }
2721            })
2722            .collect()
2723    } else {
2724        col_sizes.iter().map(|_| width).collect()
2725    };
2726
2727    if !vert_row {
2728        let num_cols = col_widths.len();
2729        if num_cols > 0 {
2730            loop {
2731                let cur_width = col_widths.iter().sum::<usize>() + num_cols - 1;
2732                if cur_width <= width {
2733                    break;
2734                }
2735                let (i, _) = col_widths
2736                    .iter()
2737                    .enumerate()
2738                    .max_by_key(|&(colno, width)| {
2739                        (
2740                            width.saturating_sub(col_sizes[colno].min_width),
2741                            width,
2742                            usize::MAX - colno,
2743                        )
2744                    })
2745                    .unwrap();
2746                col_widths[i] -= 1;
2747            }
2748        }
2749    }
2750
2751    let table_width = if vert_row {
2752        width
2753    } else {
2754        col_widths.iter().cloned().sum::<usize>()
2755            + col_widths
2756                .iter()
2757                .filter(|&w| w > &0)
2758                .count()
2759                .saturating_sub(1)
2760    };
2761
2762    renderer.start_table()?;
2763
2764    if table_width != 0 && renderer.options.draw_borders {
2765        renderer.add_horizontal_border_width(table_width)?;
2766    }
2767
2768    Ok(TreeMapResult::PendingChildren {
2769        children: table.into_rows(col_widths, vert_row),
2770        cons: Box::new(|_, _| Ok(Some(None))),
2771        prefn: None,
2772        postfn: None,
2773    })
2774}
2775
2776fn render_table_row<T: Write, D: TextDecorator>(
2777    _renderer: &mut TextRenderer<D>,
2778    row: RenderTableRow,
2779    pushed_style: PushedStyleInfo,
2780    _err_out: &mut T,
2781) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2782    let rowspans: Vec<usize> = row.cells().map(|cell| cell.rowspan).collect();
2783    let have_overhang = row.cells().any(|cell| cell.is_dummy);
2784    TreeMapResult::PendingChildren {
2785        children: row.into_cells(false),
2786        cons: Box::new(move |builders, children| {
2787            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2788            if have_overhang || children.iter().any(|c| !c.empty()) {
2789                builders.append_columns_with_borders(
2790                    children.into_iter().zip(rowspans.into_iter()),
2791                    true,
2792                )?;
2793            }
2794            pushed_style.unwind(builders);
2795            Ok(Some(None))
2796        }),
2797        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2798            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2799                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2800                renderer.push(sub_builder);
2801                Ok(())
2802            } else {
2803                panic!()
2804            }
2805        })),
2806        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2807    }
2808}
2809
2810fn render_table_row_vert<T: Write, D: TextDecorator>(
2811    _renderer: &mut TextRenderer<D>,
2812    row: RenderTableRow,
2813    pushed_style: PushedStyleInfo,
2814    _err_out: &mut T,
2815) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2816    TreeMapResult::PendingChildren {
2817        children: row.into_cells(true),
2818        cons: Box::new(|builders, children| {
2819            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2820            builders.append_vert_row(children)?;
2821            pushed_style.unwind(builders);
2822            Ok(Some(None))
2823        }),
2824        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2825            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2826                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2827                renderer.push(sub_builder);
2828                Ok(())
2829            } else {
2830                Err(Error::Fail)
2831            }
2832        })),
2833        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2834    }
2835}
2836
2837fn render_table_cell<T: Write, D: TextDecorator>(
2838    _renderer: &mut TextRenderer<D>,
2839    cell: RenderTableCell,
2840    pushed_style: PushedStyleInfo,
2841    _err_out: &mut T,
2842) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2843    pending2(cell.content, |renderer: &mut TextRenderer<D>, _| {
2844        pushed_style.unwind(renderer);
2845        let sub_builder = renderer.pop();
2846
2847        Ok(Some(Some(sub_builder)))
2848    })
2849}
2850
2851pub mod config {
2852    //! Configure the HTML to text translation using the `Config` type, which can be
2853    //! constructed using one of the functions in this module.
2854    use std::io;
2855
2856    use super::Error;
2857    use crate::css::types::Importance;
2858    use crate::css::{Ruleset, Selector, SelectorComponent, Style, StyleData};
2859    use crate::{
2860        css::{PseudoContent, PseudoElement, StyleDecl},
2861        render::text_renderer::{
2862            PlainDecorator, RichAnnotation, RichDecorator, TaggedLine, TextDecorator,
2863        },
2864        HtmlContext, RenderTree, Result, MIN_WIDTH,
2865    };
2866    #[cfg(feature = "css_ext")]
2867    use crate::{HighlighterMap, SyntaxHighlighter};
2868
2869    /// Specify how images with missing or empty alt text are handled
2870    #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
2871    #[non_exhaustive]
2872    pub enum ImageRenderMode {
2873        /// Ignore `<img>` without alt, or `<svg>` without `<title>`.
2874        #[default]
2875        IgnoreEmpty,
2876        /// Always process images (will be handled by the decorator)
2877        ShowAlways,
2878        /// Use a fixed replacement text (e.g. emoji)
2879        Replace(&'static str),
2880        /// Replace with the last component of the link filename if any
2881        Filename,
2882    }
2883
2884    /// Configure the HTML processing.
2885    pub struct Config<D: TextDecorator> {
2886        decorator: D,
2887
2888        max_wrap_width: Option<usize>,
2889
2890        style: StyleData,
2891        #[cfg(feature = "css")]
2892        use_doc_css: bool,
2893
2894        pad_block_width: bool,
2895
2896        allow_width_overflow: bool,
2897        min_wrap_width: usize,
2898        raw: bool,
2899        draw_borders: bool,
2900        wrap_links: bool,
2901        include_link_footnotes: bool,
2902        use_unicode_strikeout: bool,
2903        image_mode: ImageRenderMode,
2904
2905        #[cfg(feature = "css_ext")]
2906        syntax_highlighters: HighlighterMap,
2907    }
2908
2909    impl<D: TextDecorator> Config<D> {
2910        /// Make the HtmlContext from self.
2911        pub(crate) fn make_context(&self) -> HtmlContext {
2912            HtmlContext {
2913                style_data: self.style.clone(),
2914                #[cfg(feature = "css")]
2915                use_doc_css: self.use_doc_css,
2916
2917                max_wrap_width: self.max_wrap_width,
2918                pad_block_width: self.pad_block_width,
2919                allow_width_overflow: self.allow_width_overflow,
2920                min_wrap_width: self.min_wrap_width,
2921                raw: self.raw,
2922                draw_borders: self.draw_borders,
2923                wrap_links: self.wrap_links,
2924                include_link_footnotes: self.include_link_footnotes,
2925                use_unicode_strikeout: self.use_unicode_strikeout,
2926                image_mode: self.image_mode,
2927
2928                #[cfg(feature = "css_ext")]
2929                syntax_highlighters: self.syntax_highlighters.clone(),
2930            }
2931        }
2932        /// Parse with context.
2933        pub(crate) fn do_parse<R>(&self, context: &mut HtmlContext, input: R) -> Result<RenderTree>
2934        where
2935            R: io::Read,
2936        {
2937            let dom = self.parse_html(input)?;
2938            let render_tree = super::dom_to_render_tree_with_context(
2939                dom.document.clone(),
2940                &mut io::sink(),
2941                context,
2942            )?
2943            .ok_or(Error::Fail)?;
2944            Ok(RenderTree(render_tree))
2945        }
2946
2947        /// Parse the HTML into a DOM structure.
2948        pub fn parse_html<R: io::Read>(&self, mut input: R) -> Result<super::RcDom> {
2949            use html5ever::tendril::TendrilSink;
2950            let opts = super::ParseOpts {
2951                tree_builder: super::TreeBuilderOpts {
2952                    scripting_enabled: false,
2953                    ..Default::default()
2954                },
2955                ..Default::default()
2956            };
2957            Ok(super::parse_document(super::RcDom::default(), opts)
2958                .from_utf8()
2959                .read_from(&mut input)?)
2960        }
2961
2962        /// Convert an HTML DOM into a RenderTree.
2963        pub fn dom_to_render_tree(&self, dom: &super::RcDom) -> Result<RenderTree> {
2964            Ok(RenderTree(
2965                super::dom_to_render_tree_with_context(
2966                    dom.document.clone(),
2967                    &mut io::sink(),
2968                    &mut self.make_context(),
2969                )?
2970                .ok_or(Error::Fail)?,
2971            ))
2972        }
2973
2974        /// Render an existing RenderTree into a string.
2975        pub fn render_to_string(&self, render_tree: RenderTree, width: usize) -> Result<String> {
2976            let s = render_tree
2977                .render_with_context(
2978                    &mut self.make_context(),
2979                    width,
2980                    self.decorator.make_subblock_decorator(),
2981                )?
2982                .into_string()?;
2983            Ok(s)
2984        }
2985
2986        /// Take an existing RenderTree, and returns text wrapped to `width` columns.
2987        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
2988        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
2989        /// the `Vec`.
2990        pub fn render_to_lines(
2991            &self,
2992            render_tree: RenderTree,
2993            width: usize,
2994        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
2995            render_tree
2996                .render_with_context(
2997                    &mut self.make_context(),
2998                    width,
2999                    self.decorator.make_subblock_decorator(),
3000                )?
3001                .into_lines()
3002        }
3003
3004        /// Reads HTML from `input`, and returns a `String` with text wrapped to
3005        /// `width` columns.
3006        pub fn string_from_read<R: std::io::Read>(self, input: R, width: usize) -> Result<String> {
3007            let mut context = self.make_context();
3008            let s = self
3009                .do_parse(&mut context, input)?
3010                .render_with_context(&mut context, width, self.decorator)?
3011                .into_string()?;
3012            Ok(s)
3013        }
3014
3015        /// Reads HTML from `input`, and returns text wrapped to `width` columns.
3016        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3017        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
3018        /// the `Vec`.
3019        pub fn lines_from_read<R: std::io::Read>(
3020            self,
3021            input: R,
3022            width: usize,
3023        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3024            let mut context = self.make_context();
3025            self.do_parse(&mut context, input)?
3026                .render_with_context(&mut context, width, self.decorator)?
3027                .into_lines()
3028        }
3029
3030        #[cfg(feature = "css")]
3031        /// Add some CSS rules which will be used (if supported) with any
3032        /// HTML processed.
3033        pub fn add_css(mut self, css: &str) -> Result<Self> {
3034            self.style.add_user_css(css)?;
3035            Ok(self)
3036        }
3037
3038        #[cfg(feature = "css")]
3039        /// Add some agent CSS rules which will be used (if supported) with any
3040        /// HTML processed.
3041        pub fn add_agent_css(mut self, css: &str) -> Result<Self> {
3042            self.style.add_agent_css(css)?;
3043            Ok(self)
3044        }
3045
3046        #[cfg(feature = "css")]
3047        /// Parse CSS from any \<style\> elements and use supported rules.
3048        pub fn use_doc_css(mut self) -> Self {
3049            self.use_doc_css = true;
3050            self
3051        }
3052
3053        /// Pad lines out to the full render width.
3054        pub fn pad_block_width(mut self) -> Self {
3055            self.pad_block_width = true;
3056            self
3057        }
3058
3059        /// Set the maximum text wrap width.
3060        /// When set, paragraphs will be wrapped to that width even if there
3061        /// is more total width available for rendering.
3062        pub fn max_wrap_width(mut self, wrap_width: usize) -> Self {
3063            self.max_wrap_width = Some(wrap_width);
3064            self
3065        }
3066
3067        /// Allow the output to be wider than the max width.  When enabled,
3068        /// then output wider than the specified width will be returned
3069        /// instead of returning `Err(TooNarrow)` if the output wouldn't
3070        /// otherwise fit.
3071        pub fn allow_width_overflow(mut self) -> Self {
3072            self.allow_width_overflow = true;
3073            self
3074        }
3075
3076        /// Set the minimum width for text wrapping.  The default is 3.
3077        /// Blocks of text will be forced to have at least this width
3078        /// (unless the text inside is less than that).  Increasing this
3079        /// can increase the chance that the width will overflow, leading
3080        /// to a TooNarrow error unless `allow_width_overflow()` is set.
3081        pub fn min_wrap_width(mut self, min_wrap_width: usize) -> Self {
3082            self.min_wrap_width = min_wrap_width;
3083            self
3084        }
3085
3086        /// Raw extraction, ensures text in table cells ends up rendered together
3087        /// This traverses tables as if they had a single column and every cell is its own row.
3088        /// Implies `no_table_borders()`
3089        pub fn raw_mode(mut self, raw: bool) -> Self {
3090            self.raw = raw;
3091            self.draw_borders = false;
3092            self
3093        }
3094
3095        /// Do not render table borders
3096        pub fn no_table_borders(mut self) -> Self {
3097            self.draw_borders = false;
3098            self
3099        }
3100        /// Do not wrap links
3101        pub fn no_link_wrapping(mut self) -> Self {
3102            self.wrap_links = false;
3103            self
3104        }
3105
3106        /// Select whether to use Unicode combining characters to strike out text.
3107        pub fn unicode_strikeout(mut self, use_unicode: bool) -> Self {
3108            self.use_unicode_strikeout = use_unicode;
3109            self
3110        }
3111
3112        /// Make a simple "contains" type rule for an element.
3113        fn make_surround_rule(element: &str, after: bool, content: &str) -> Ruleset {
3114            Ruleset {
3115                selector: Selector {
3116                    components: vec![SelectorComponent::Element(element.into())],
3117                    pseudo_element: Some(if after {
3118                        PseudoElement::After
3119                    } else {
3120                        PseudoElement::Before
3121                    }),
3122                },
3123                styles: vec![StyleDecl {
3124                    style: Style::Content(PseudoContent {
3125                        text: content.into(),
3126                    }),
3127                    importance: Importance::Default,
3128                }],
3129            }
3130        }
3131
3132        /// Decorate <em> etc. similarly to markdown
3133        pub fn do_decorate(mut self) -> Self {
3134            self.style.add_agent_rules(&[
3135                Self::make_surround_rule("em", false, "*"),
3136                Self::make_surround_rule("em", true, "*"),
3137                Self::make_surround_rule("dt", false, "*"),
3138                Self::make_surround_rule("dt", true, "*"),
3139                Self::make_surround_rule("strong", false, "**"),
3140                Self::make_surround_rule("strong", true, "**"),
3141                Self::make_surround_rule("code", false, "`"),
3142                Self::make_surround_rule("code", true, "`"),
3143            ]);
3144            self
3145        }
3146
3147        /// Add footnotes for hyperlinks
3148        pub fn link_footnotes(mut self, include_footnotes: bool) -> Self {
3149            self.include_link_footnotes = include_footnotes;
3150            self
3151        }
3152
3153        /// Configure how images with no alt text are handled.
3154        pub fn empty_img_mode(mut self, img_mode: ImageRenderMode) -> Self {
3155            self.image_mode = img_mode;
3156            self
3157        }
3158
3159        #[cfg(feature = "css_ext")]
3160        /// Register a named syntax highlighter.
3161        ///
3162        /// The highlighter will be used when a `<pre>` element
3163        /// is styled with `x-syntax: name`
3164        pub fn register_highlighter(
3165            mut self,
3166            name: impl Into<String>,
3167            f: SyntaxHighlighter,
3168        ) -> Self {
3169            use std::rc::Rc;
3170
3171            self.syntax_highlighters.insert(name.into(), Rc::new(f));
3172            self
3173        }
3174    }
3175
3176    impl Config<RichDecorator> {
3177        /// Return coloured text.  `colour_map` is a function which takes
3178        /// a list of `RichAnnotation` and some text, and returns the text
3179        /// with any terminal escapes desired to indicate those annotations
3180        /// (such as colour).
3181        pub fn coloured<R, FMap>(self, input: R, width: usize, colour_map: FMap) -> Result<String>
3182        where
3183            R: std::io::Read,
3184            FMap: Fn(&[RichAnnotation], &str) -> String,
3185        {
3186            let mut context = self.make_context();
3187            let render_tree = self.do_parse(&mut context, input)?;
3188            self.render_coloured(render_tree, width, colour_map)
3189        }
3190
3191        /// Return coloured text from a RenderTree.  `colour_map` is a function which takes a list
3192        /// of `RichAnnotation` and some text, and returns the text with any terminal escapes
3193        /// desired to indicate those annotations (such as colour).
3194        pub fn render_coloured<FMap>(
3195            &self,
3196            render_tree: RenderTree,
3197            width: usize,
3198            colour_map: FMap,
3199        ) -> Result<String>
3200        where
3201            FMap: Fn(&[RichAnnotation], &str) -> String,
3202        {
3203            let lines = self.render_to_lines(render_tree, width)?;
3204
3205            let mut result = String::new();
3206            for line in lines {
3207                for ts in line.tagged_strings() {
3208                    result.push_str(&colour_map(&ts.tag, &ts.s));
3209                }
3210                result.push('\n');
3211            }
3212            Ok(result)
3213        }
3214    }
3215
3216    /// Return a Config initialized with a `RichDecorator`.
3217    pub fn rich() -> Config<RichDecorator> {
3218        with_decorator(RichDecorator::new())
3219    }
3220
3221    /// Return a Config initialized with a `PlainDecorator`.
3222    pub fn plain() -> Config<PlainDecorator> {
3223        with_decorator(PlainDecorator::new())
3224            .do_decorate()
3225            .link_footnotes(true)
3226    }
3227
3228    /// Return a Config initialized with a `PlainDecorator`.
3229    pub fn plain_no_decorate() -> Config<PlainDecorator> {
3230        with_decorator(PlainDecorator::new())
3231    }
3232
3233    /// Return a Config initialized with a custom decorator.
3234    pub fn with_decorator<D: TextDecorator>(decorator: D) -> Config<D> {
3235        Config {
3236            decorator,
3237            style: Default::default(),
3238            #[cfg(feature = "css")]
3239            use_doc_css: false,
3240            max_wrap_width: None,
3241            pad_block_width: false,
3242            allow_width_overflow: false,
3243            min_wrap_width: MIN_WIDTH,
3244            raw: false,
3245            draw_borders: true,
3246            wrap_links: true,
3247            include_link_footnotes: false,
3248            use_unicode_strikeout: true,
3249            image_mode: ImageRenderMode::IgnoreEmpty,
3250            #[cfg(feature = "css_ext")]
3251            syntax_highlighters: Default::default(),
3252        }
3253    }
3254}
3255
3256/// The structure of an HTML document that can be rendered using a [`TextDecorator`][].
3257///
3258/// [`TextDecorator`]: render/text_renderer/trait.TextDecorator.html
3259
3260#[derive(Clone, Debug)]
3261pub struct RenderTree(RenderNode);
3262
3263impl std::fmt::Display for RenderTree {
3264    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3265        writeln!(f, "Render tree:")?;
3266        self.0.write_self(f, 1)
3267    }
3268}
3269
3270impl RenderTree {
3271    /// Render this document using the given `decorator` and wrap it to `width` columns.
3272    fn render_with_context<D: TextDecorator>(
3273        self,
3274        context: &mut HtmlContext,
3275        width: usize,
3276        decorator: D,
3277    ) -> Result<RenderedText<D>> {
3278        if width == 0 {
3279            return Err(Error::TooNarrow);
3280        }
3281        let render_options = RenderOptions {
3282            wrap_width: context.max_wrap_width,
3283            pad_block_width: context.pad_block_width,
3284            allow_width_overflow: context.allow_width_overflow,
3285            raw: context.raw,
3286            draw_borders: context.draw_borders,
3287            wrap_links: context.wrap_links,
3288            include_link_footnotes: context.include_link_footnotes,
3289            use_unicode_strikeout: context.use_unicode_strikeout,
3290            img_mode: context.image_mode,
3291        };
3292        let test_decorator = decorator.make_subblock_decorator();
3293        let builder = SubRenderer::new(width, render_options, decorator);
3294        let builder =
3295            render_tree_to_string(context, builder, &test_decorator, self.0, &mut io::sink())?;
3296        Ok(RenderedText(builder))
3297    }
3298}
3299
3300/// A rendered HTML document.
3301struct RenderedText<D: TextDecorator>(SubRenderer<D>);
3302
3303impl<D: TextDecorator> RenderedText<D> {
3304    /// Convert the rendered HTML document to a string.
3305    fn into_string(self) -> render::Result<String> {
3306        self.0.into_string()
3307    }
3308
3309    /// Convert the rendered HTML document to a vector of lines with the annotations created by the
3310    /// decorator.
3311    fn into_lines(self) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3312        Ok(self
3313            .0
3314            .into_lines()?
3315            .into_iter()
3316            .map(RenderLine::into_tagged_line)
3317            .collect())
3318    }
3319}
3320
3321/// Reads and parses HTML from `input` and prepares a render tree.
3322pub fn parse(input: impl io::Read) -> Result<RenderTree> {
3323    let cfg = config::with_decorator(TrivialDecorator::new());
3324    cfg.do_parse(&mut cfg.make_context(), input)
3325}
3326
3327/// Reads HTML from `input`, decorates it using `decorator`, and
3328/// returns a `String` with text wrapped to `width` columns.
3329pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> Result<String>
3330where
3331    R: io::Read,
3332    D: TextDecorator,
3333{
3334    config::with_decorator(decorator).string_from_read(input, width)
3335}
3336
3337/// Reads HTML from `input`, and returns a `String` with text wrapped to
3338/// `width` columns.
3339pub fn from_read<R>(input: R, width: usize) -> Result<String>
3340where
3341    R: io::Read,
3342{
3343    config::plain().string_from_read(input, width)
3344}
3345
3346/// Reads HTML from `input`, and returns text wrapped to `width` columns.
3347///
3348/// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3349/// of `RichAnnotation`.  The "outer" annotation comes first in the `Vec`.
3350pub fn from_read_rich<R>(input: R, width: usize) -> Result<Vec<TaggedLine<Vec<RichAnnotation>>>>
3351where
3352    R: io::Read,
3353{
3354    config::rich().lines_from_read(input, width)
3355}
3356
3357mod ansi_colours;
3358
3359pub use ansi_colours::from_read_coloured;
3360
3361#[cfg(test)]
3362mod tests;
3363
3364fn calc_ol_prefix_size<D: TextDecorator>(start: i64, num_items: usize, decorator: &D) -> usize {
3365    // The prefix width could be at either end if the start is negative.
3366    let min_number = start;
3367    // Assumption: num_items can't overflow isize.
3368    let max_number = start + (num_items as i64) - 1;
3369
3370    // This assumes that the decorator gives the same width as default.
3371    let prefix_width_min = decorator.ordered_item_prefix(min_number).len();
3372    let prefix_width_max = decorator.ordered_item_prefix(max_number).len();
3373    max(prefix_width_min, prefix_width_max)
3374}