Skip to main content

html2text/
lib.rs

1//! Convert HTML to text formats.
2//!
3//! This crate renders HTML into a text format, wrapped to a specified width.
4//! This can either be plain text or with extra annotations to (for example)
5//! show in a terminal which supports colours.
6//!
7//! # Examples
8//!
9//! ```rust
10//! # use html2text::from_read;
11//! let html = b"
12//!        <ul>
13//!          <li>Item one</li>
14//!          <li>Item two</li>
15//!          <li>Item three</li>
16//!        </ul>";
17//! assert_eq!(from_read(&html[..], 20).unwrap(),
18//!            "\
19//! * Item one
20//! * Item two
21//! * Item three
22//! ");
23//! ```
24//! A couple of simple demonstration programs are included as examples:
25//!
26//! ### html2text
27//!
28//! The simplest example uses `from_read` to convert HTML on stdin into plain
29//! text:
30//!
31//! ```sh
32//! $ cargo run --example html2text < foo.html
33//! [...]
34//! ```
35//!
36//! ### html2term
37//!
38//! A very simple example of using the rich interface (`from_read_rich`) for a
39//! slightly interactive console HTML viewer is provided as `html2term`.
40//!
41//! ```sh
42//! $ cargo run --example html2term foo.html
43//! [...]
44//! ```
45//!
46//! Note that this example takes the HTML file as a parameter so that it can
47//! read keys from stdin.
48//!
49
50#![deny(missing_docs)]
51
52// Check code in README.md
53#[cfg(doctest)]
54#[doc = include_str!("../README.md")]
55struct ReadMe;
56
57#[macro_use]
58mod macros;
59
60pub mod css;
61pub mod render;
62
63/// Extra methods on chars for dealing with special cases with wrapping and whitespace.
64trait WhitespaceExt {
65    /// Returns whether this character always takes space. This is true for non-whitespace and
66    /// non-breaking spaces.
67    fn always_takes_space(&self) -> bool;
68
69    /// Returns true if a word before this character is allowed. This includes most whitespace
70    /// (but not non-breaking space).
71    fn is_wordbreak_point(&self) -> bool;
72}
73
74impl WhitespaceExt for char {
75    fn always_takes_space(&self) -> bool {
76        match *self {
77            '\u{A0}' => true,
78            c if !c.is_whitespace() => true,
79            _ => false,
80        }
81    }
82
83    fn is_wordbreak_point(&self) -> bool {
84        match *self {
85            '\u{00A0}' => false,
86            '\u{200b}' => true,
87            c if c.is_whitespace() => true,
88            _ => false,
89        }
90    }
91}
92
93/// Extra methods for strings
94trait StrExt {
95    /// Trims leading/trailing whitespace expect for hard spaces.
96    fn trim_collapsible_ws(&self) -> &str;
97}
98
99impl StrExt for str {
100    fn trim_collapsible_ws(&self) -> &str {
101        self.trim_matches(|c: char| !c.always_takes_space())
102    }
103}
104
105#[cfg(feature = "css_ext")]
106/// Text style information.
107#[derive(Clone, Debug)]
108#[non_exhaustive]
109pub struct TextStyle {
110    /// The foreground colour
111    pub fg_colour: Colour,
112    /// The background colour, or None.
113    pub bg_colour: Option<Colour>,
114}
115
116#[cfg(feature = "css_ext")]
117impl TextStyle {
118    /// Create a TextStyle from foreground and background colours.
119    pub fn colours(fg_colour: Colour, bg_colour: Colour) -> Self {
120        TextStyle {
121            fg_colour,
122            bg_colour: Some(bg_colour),
123        }
124    }
125
126    /// Create a TextStyle using only a foreground colour.
127    pub fn foreground(fg_colour: Colour) -> Self {
128        TextStyle {
129            fg_colour,
130            bg_colour: None,
131        }
132    }
133}
134
135#[cfg(feature = "css_ext")]
136/// Syntax highlighter function.
137///
138/// Takes a string corresponding to some text to be highlighted, and returns
139/// spans with sub-strs of that text with associated colours.
140pub type SyntaxHighlighter = Box<dyn for<'a> Fn(&'a str) -> Vec<(TextStyle, &'a str)>>;
141
142use markup5ever_rcdom::Node;
143use render::text_renderer::{
144    RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer,
145};
146use render::{Renderer, TextDecorator, TrivialDecorator};
147
148use html5ever::driver::ParseOpts;
149use html5ever::parse_document;
150use html5ever::tree_builder::TreeBuilderOpts;
151mod markup5ever_rcdom;
152pub use html5ever::{expanded_name, local_name, namespace_url, ns};
153pub use markup5ever_rcdom::{
154    Handle,
155    NodeData::{Comment, Document, Element},
156    RcDom,
157};
158
159use std::cell::{Cell, RefCell};
160use std::cmp::{max, min};
161use std::collections::{BTreeSet, HashMap};
162#[cfg(feature = "css_ext")]
163use std::ops::Range;
164use std::rc::Rc;
165use unicode_width::UnicodeWidthStr;
166
167use std::io;
168use std::io::Write;
169use std::iter::{once, repeat};
170
171#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
172pub(crate) enum WhiteSpace {
173    #[default]
174    Normal,
175    // NoWrap,
176    Pre,
177    #[allow(unused)]
178    PreWrap,
179    // PreLine,
180    // BreakSpaces,
181}
182
183impl WhiteSpace {
184    pub fn preserve_whitespace(&self) -> bool {
185        match self {
186            WhiteSpace::Normal => false,
187            WhiteSpace::Pre | WhiteSpace::PreWrap => true,
188        }
189    }
190    #[allow(unused)]
191    pub fn do_wrap(&self) -> bool {
192        match self {
193            WhiteSpace::Normal | WhiteSpace::PreWrap => true,
194            WhiteSpace::Pre => false,
195        }
196    }
197}
198
199/// An RGB colour value
200#[derive(Copy, Clone, Debug, PartialEq, Eq)]
201pub struct Colour {
202    /// Red value
203    pub r: u8,
204    /// Green value
205    pub g: u8,
206    /// Blue value
207    pub b: u8,
208}
209
210impl std::fmt::Display for Colour {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        write!(f, "#{:02x}{:02x}{:02x}", self.r, self.g, self.b)
213    }
214}
215
216#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, PartialOrd)]
217pub(crate) enum StyleOrigin {
218    #[default]
219    None,
220    Agent,
221    #[allow(unused)]
222    User,
223    #[allow(unused)]
224    Author,
225}
226
227#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
228pub(crate) struct Specificity {
229    inline: bool,
230    id: u16,
231    class: u16,
232    typ: u16,
233}
234
235impl Specificity {
236    #[cfg(feature = "css")]
237    fn inline() -> Self {
238        Specificity {
239            inline: true,
240            id: 0,
241            class: 0,
242            typ: 0,
243        }
244    }
245}
246
247impl std::ops::Add<&Specificity> for &Specificity {
248    type Output = Specificity;
249
250    fn add(self, rhs: &Specificity) -> Self::Output {
251        Specificity {
252            inline: self.inline || rhs.inline,
253            id: self.id + rhs.id,
254            class: self.class + rhs.class,
255            typ: self.typ + rhs.typ,
256        }
257    }
258}
259
260impl std::ops::AddAssign<&Specificity> for Specificity {
261    fn add_assign(&mut self, rhs: &Specificity) {
262        self.inline = self.inline || rhs.inline;
263        self.id += rhs.id;
264        self.class += rhs.class;
265        self.typ += rhs.typ;
266    }
267}
268
269impl PartialOrd for Specificity {
270    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
271        match self.inline.partial_cmp(&other.inline) {
272            Some(core::cmp::Ordering::Equal) => {}
273            ord => return ord,
274        }
275        match self.id.partial_cmp(&other.id) {
276            Some(core::cmp::Ordering::Equal) => {}
277            ord => return ord,
278        }
279        match self.class.partial_cmp(&other.class) {
280            Some(core::cmp::Ordering::Equal) => {}
281            ord => return ord,
282        }
283        self.typ.partial_cmp(&other.typ)
284    }
285}
286
287#[derive(Clone, Copy, Debug)]
288pub(crate) struct WithSpec<T> {
289    val: Option<T>,
290    origin: StyleOrigin,
291    specificity: Specificity,
292    important: bool,
293}
294impl<T: Clone> WithSpec<T> {
295    pub(crate) fn maybe_update(
296        &mut self,
297        important: bool,
298        origin: StyleOrigin,
299        specificity: Specificity,
300        val: T,
301    ) {
302        if self.val.is_some() {
303            // We already have a value, so need to check.
304            if self.important && !important {
305                // important takes priority over not important.
306                return;
307            }
308            // importance is the same.  Next is checking the origin.
309            {
310                use StyleOrigin::*;
311                match (self.origin, origin) {
312                    (Agent, Agent) | (User, User) | (Author, Author) => {
313                        // They're the same so continue the comparison
314                    }
315                    (mine, theirs) => {
316                        if (important && theirs > mine) || (!important && mine > theirs) {
317                            return;
318                        }
319                    }
320                }
321            }
322            // We're now from the same origin an importance
323            if specificity < self.specificity {
324                return;
325            }
326        }
327        self.val = Some(val);
328        self.origin = origin;
329        self.specificity = specificity;
330        self.important = important;
331    }
332
333    pub fn val(&self) -> Option<&T> {
334        self.val.as_ref()
335    }
336}
337
338impl<T> Default for WithSpec<T> {
339    fn default() -> Self {
340        WithSpec {
341            val: None,
342            origin: StyleOrigin::None,
343            specificity: Default::default(),
344            important: false,
345        }
346    }
347}
348
349#[derive(Debug, Clone, Default)]
350pub(crate) struct ComputedStyle {
351    #[cfg(feature = "css")]
352    /// The computed foreground colour, if any
353    pub(crate) colour: WithSpec<Colour>,
354    #[cfg(feature = "css")]
355    /// The computed background colour, if any
356    pub(crate) bg_colour: WithSpec<Colour>,
357    #[cfg(feature = "css")]
358    /// If set, indicates whether `display: none` or something equivalent applies
359    pub(crate) display: WithSpec<css::Display>,
360    /// The CSS white-space property
361    pub(crate) white_space: WithSpec<WhiteSpace>,
362    /// The CSS content property
363    pub(crate) content: WithSpec<css::PseudoContent>,
364    #[cfg(feature = "css_ext")]
365    pub(crate) syntax: WithSpec<css::SyntaxInfo>,
366
367    /// The CSS content property for ::before
368    pub(crate) content_before: Option<Box<ComputedStyle>>,
369    /// The CSS content property for ::after
370    pub(crate) content_after: Option<Box<ComputedStyle>>,
371
372    /// A non-CSS flag indicating we're inside a <pre>.
373    pub(crate) internal_pre: bool,
374}
375
376impl ComputedStyle {
377    /// Return the style data inherited by children.
378    pub(crate) fn inherit(&self) -> Self {
379        // TODO: clear fields that shouldn't be inherited
380        self.clone()
381    }
382}
383
384/// Errors from reading or rendering HTML
385#[derive(thiserror::Error, Debug)]
386#[non_exhaustive]
387pub enum Error {
388    /// The output width was too narrow to render to.
389    #[error("Output width not wide enough.")]
390    TooNarrow,
391    /// CSS parse error
392    #[error("Invalid CSS")]
393    CssParseError,
394    /// An general error was encountered.
395    #[error("Unknown failure")]
396    Fail,
397    /// An I/O error
398    #[error("I/O error")]
399    IoError(#[from] io::Error),
400}
401
402impl PartialEq for Error {
403    fn eq(&self, other: &Error) -> bool {
404        use Error::*;
405        match (self, other) {
406            (TooNarrow, TooNarrow) => true,
407            #[cfg(feature = "css")]
408            (CssParseError, CssParseError) => true,
409            (Fail, Fail) => true,
410            _ => false,
411        }
412    }
413}
414
415impl Eq for Error {}
416
417type Result<T> = std::result::Result<T, Error>;
418
419const MIN_WIDTH: usize = 3;
420
421/// Size information/estimate
422#[derive(Debug, Copy, Clone, Default)]
423struct SizeEstimate {
424    size: usize,      // Rough overall size
425    min_width: usize, // The narrowest possible
426
427    // The use is specific to the node type.
428    prefix_size: usize,
429}
430
431impl SizeEstimate {
432    /// Combine two estimates into one (add size and take the largest
433    /// min width)
434    fn add(self, other: SizeEstimate) -> SizeEstimate {
435        let min_width = max(self.min_width, other.min_width);
436        SizeEstimate {
437            size: self.size + other.size,
438            min_width,
439            prefix_size: 0,
440        }
441    }
442    /// Combine two estimates into one which need to be side by side.
443    /// The min widths are added.
444    fn add_hor(self, other: SizeEstimate) -> SizeEstimate {
445        SizeEstimate {
446            size: self.size + other.size,
447            min_width: self.min_width + other.min_width,
448            prefix_size: 0,
449        }
450    }
451
452    /// Combine two estimates into one (take max of each)
453    fn max(self, other: SizeEstimate) -> SizeEstimate {
454        SizeEstimate {
455            size: max(self.size, other.size),
456            min_width: max(self.min_width, other.min_width),
457            prefix_size: 0,
458        }
459    }
460}
461
462#[derive(Clone, Debug)]
463/// Render tree table cell
464struct RenderTableCell {
465    colspan: usize,
466    rowspan: usize,
467    content: Vec<RenderNode>,
468    size_estimate: Cell<Option<SizeEstimate>>,
469    col_width: Option<usize>, // Actual width to use
470    x_pos: Option<usize>,     // X location
471    style: ComputedStyle,
472    is_dummy: bool,
473}
474
475impl RenderTableCell {
476    /// Calculate or return the estimate size of the cell
477    fn get_size_estimate(&self) -> SizeEstimate {
478        let Some(size) = self.size_estimate.get() else {
479            let size = self
480                .content
481                .iter()
482                .map(|node| node.get_size_estimate())
483                .fold(Default::default(), SizeEstimate::add);
484            self.size_estimate.set(Some(size));
485            return size;
486        };
487        size
488    }
489
490    /// Make a placeholder cell to cover for a cell above with
491    /// larger rowspan.
492    pub fn dummy(colspan: usize) -> Self {
493        RenderTableCell {
494            colspan,
495            rowspan: 1,
496            content: Default::default(),
497            size_estimate: Cell::new(Some(SizeEstimate::default())),
498            col_width: None,
499            x_pos: None,
500            style: Default::default(),
501            is_dummy: true,
502        }
503    }
504}
505
506#[derive(Clone, Debug)]
507/// Render tree table row
508struct RenderTableRow {
509    cells: Vec<RenderTableCell>,
510    col_sizes: Option<Vec<usize>>,
511    style: ComputedStyle,
512}
513
514impl RenderTableRow {
515    /// Return a mutable iterator over the cells.
516    fn cells(&self) -> std::slice::Iter<'_, RenderTableCell> {
517        self.cells.iter()
518    }
519    /// Return a mutable iterator over the cells.
520    fn cells_mut(&mut self) -> std::slice::IterMut<'_, RenderTableCell> {
521        self.cells.iter_mut()
522    }
523    /// Return an iterator which returns cells by values (removing
524    /// them from the row).
525    fn cells_drain(&mut self) -> impl Iterator<Item = RenderTableCell> + use<> {
526        std::mem::take(&mut self.cells).into_iter()
527    }
528    /// Count the number of cells in the row.
529    /// Takes into account colspan.
530    fn num_cells(&self) -> usize {
531        self.cells.iter().map(|cell| cell.colspan.max(1)).sum()
532    }
533
534    /// Return the contained cells as RenderNodes, annotated with their
535    /// widths if available.  Skips cells with no width allocated.
536    fn into_cells(self, vertical: bool) -> Vec<RenderNode> {
537        let mut result = Vec::new();
538        let mut colno = 0;
539        let col_sizes = self.col_sizes.unwrap();
540        let mut x_pos = 0;
541        for mut cell in self.cells {
542            let colspan = cell.colspan;
543            let col_width = if vertical {
544                col_sizes[colno]
545            } else {
546                col_sizes[colno..colno + cell.colspan].iter().sum::<usize>()
547            };
548            // Skip any zero-width columns
549            if col_width > 0 {
550                let this_col_width = col_width + cell.colspan - 1;
551                cell.col_width = Some(this_col_width);
552                cell.x_pos = Some(x_pos);
553                x_pos += this_col_width + 1;
554                let style = cell.style.clone();
555                result.push(RenderNode::new_styled(
556                    RenderNodeInfo::TableCell(cell),
557                    style,
558                ));
559            }
560            colno += colspan;
561        }
562        result
563    }
564}
565
566#[derive(Clone, Debug)]
567/// A representation of a table render tree with metadata.
568struct RenderTable {
569    rows: Vec<RenderTableRow>,
570    num_columns: usize,
571    size_estimate: Cell<Option<SizeEstimate>>,
572}
573
574impl RenderTable {
575    /// Create a new RenderTable with the given rows
576    fn new(mut rows: Vec<RenderTableRow>) -> RenderTable {
577        // We later on want to allocate a vector sized by the column count,
578        // but occasionally we see something like colspan="1000000000".  We
579        // handle this by remapping the column ids to the smallest values
580        // possible.
581        //
582        // Tables with no explicit colspan will be unchanged, but if there
583        // are multiple columns each covered by a single <td> on every row,
584        // they will be collapsed into a single column.  For example:
585        //
586        //    <td><td colspan=1000><td>
587        //    <td colspan=1000><td><td>
588        //
589        //  becomes the equivalent:
590        //    <td><td colspan=2><td>
591        //    <td colspan=2><td><td>
592
593        // This will include 0 and the index after the last colspan.
594        let mut col_positions = BTreeSet::new();
595        // Cells which have a rowspan > 1 from previous rows.
596        // Each element is (rows_left, colpos, colspan)
597        // Before each row, the overhangs are in reverse order so that
598        // they can be popped off.
599        let mut overhang_cells: Vec<(usize, usize, usize)> = Vec::new();
600        let mut next_overhang_cells = Vec::new();
601        col_positions.insert(0);
602        for row in &mut rows {
603            let mut col = 0;
604            let mut new_cells = Vec::new();
605
606            for cell in row.cells_drain() {
607                while let Some(hanging) = overhang_cells.last() {
608                    if hanging.1 <= col {
609                        new_cells.push(RenderTableCell::dummy(hanging.2));
610                        col += hanging.2;
611                        col_positions.insert(col);
612                        let mut used = overhang_cells.pop().unwrap();
613                        if used.0 > 1 {
614                            used.0 -= 1;
615                            next_overhang_cells.push(used);
616                        }
617                    } else {
618                        break;
619                    }
620                }
621                if cell.rowspan > 1 {
622                    next_overhang_cells.push((cell.rowspan - 1, col, cell.colspan));
623                }
624                col += cell.colspan;
625                col_positions.insert(col);
626                new_cells.push(cell);
627            }
628            // Handle remaining overhanging cells
629            while let Some(mut hanging) = overhang_cells.pop() {
630                new_cells.push(RenderTableCell::dummy(hanging.2));
631                col += hanging.2;
632                col_positions.insert(col);
633                if hanging.0 > 1 {
634                    hanging.0 -= 1;
635                    next_overhang_cells.push(hanging);
636                }
637            }
638
639            row.cells = new_cells;
640            overhang_cells = std::mem::take(&mut next_overhang_cells);
641            overhang_cells.reverse();
642        }
643
644        let colmap: HashMap<_, _> = col_positions
645            .into_iter()
646            .enumerate()
647            .map(|(i, pos)| (pos, i))
648            .collect();
649
650        for row in &mut rows {
651            let mut pos = 0;
652            let mut mapped_pos = 0;
653            for cell in row.cells_mut() {
654                let nextpos = pos + cell.colspan.max(1);
655                let next_mapped_pos = *colmap.get(&nextpos).unwrap();
656                cell.colspan = next_mapped_pos - mapped_pos;
657                pos = nextpos;
658                mapped_pos = next_mapped_pos;
659            }
660        }
661
662        let num_columns = rows.iter().map(|r| r.num_cells()).max().unwrap_or(0);
663        RenderTable {
664            rows,
665            num_columns,
666            size_estimate: Cell::new(None),
667        }
668    }
669
670    /// Return an iterator over the rows.
671    fn rows(&self) -> std::slice::Iter<'_, RenderTableRow> {
672        self.rows.iter()
673    }
674
675    /// Consume this and return a `Vec<RenderNode>` containing the children;
676    /// the children know the column sizes required.
677    fn into_rows(self, col_sizes: Vec<usize>, vert: bool) -> Vec<RenderNode> {
678        self.rows
679            .into_iter()
680            .map(|mut tr| {
681                tr.col_sizes = Some(col_sizes.clone());
682                let style = tr.style.clone();
683                RenderNode::new_styled(RenderNodeInfo::TableRow(tr, vert), style)
684            })
685            .collect()
686    }
687
688    fn calc_size_estimate(&self, _context: &HtmlContext) -> SizeEstimate {
689        if self.num_columns == 0 {
690            let result = SizeEstimate {
691                size: 0,
692                min_width: 0,
693                prefix_size: 0,
694            };
695            self.size_estimate.set(Some(result));
696            return result;
697        }
698        let mut sizes: Vec<SizeEstimate> = vec![Default::default(); self.num_columns];
699
700        // For now, a simple estimate based on adding up sub-parts.
701        for row in self.rows() {
702            let mut colno = 0usize;
703            for cell in row.cells() {
704                let cellsize = cell.get_size_estimate();
705                for colnum in 0..cell.colspan {
706                    sizes[colno + colnum].size += cellsize.size / cell.colspan;
707                    sizes[colno + colnum].min_width = max(
708                        sizes[colno + colnum].min_width,
709                        cellsize.min_width / cell.colspan,
710                    );
711                }
712                colno += cell.colspan;
713            }
714        }
715        let size = sizes.iter().map(|s| s.size).sum::<usize>() + self.num_columns.saturating_sub(1);
716        let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
717        let result = SizeEstimate {
718            size,
719            min_width,
720            prefix_size: 0,
721        };
722        self.size_estimate.set(Some(result));
723        result
724    }
725}
726
727/// The node-specific information distilled from the DOM.
728#[derive(Clone, Debug)]
729#[non_exhaustive]
730enum RenderNodeInfo {
731    /// Some text.
732    Text(String),
733    /// A group of nodes collected together.
734    Container(Vec<RenderNode>),
735    /// A link with contained nodes
736    Link(String, Vec<RenderNode>),
737    /// An emphasised region
738    Em(Vec<RenderNode>),
739    /// A strong region
740    Strong(Vec<RenderNode>),
741    /// A struck out region
742    Strikeout(Vec<RenderNode>),
743    /// A code region
744    Code(Vec<RenderNode>),
745    /// An image (src, title)
746    Img(String, String),
747    /// An inline SVG (title)
748    Svg(String),
749    /// A block element with children
750    Block(Vec<RenderNode>),
751    /// A header (h1, h2, ...) with children
752    Header(usize, Vec<RenderNode>),
753    /// A Div element with children
754    Div(Vec<RenderNode>),
755    /// A blockquote
756    BlockQuote(Vec<RenderNode>),
757    /// An unordered list
758    Ul(Vec<RenderNode>),
759    /// An ordered list
760    Ol(i64, Vec<RenderNode>),
761    /// A description list (containing Dt or Dd)
762    Dl(Vec<RenderNode>),
763    /// A term (from a `<dt>`)
764    Dt(Vec<RenderNode>),
765    /// A definition (from a `<dl>`)
766    Dd(Vec<RenderNode>),
767    /// A line break
768    Break,
769    /// A table
770    Table(RenderTable),
771    /// A set of table rows (from either `<thead>` or `<tbody>`
772    TableBody(Vec<RenderTableRow>),
773    /// Table row (must only appear within a table body)
774    /// If the boolean is true, then the cells are drawn vertically
775    /// instead of horizontally (because of space).
776    TableRow(RenderTableRow, bool),
777    /// Table cell (must only appear within a table row)
778    TableCell(RenderTableCell),
779    /// Start of a named HTML fragment
780    FragStart(String),
781    /// A list item
782    ListItem(Vec<RenderNode>),
783    /// Superscript text
784    Sup(Vec<RenderNode>),
785}
786
787/// Common fields from a node.
788#[derive(Clone, Debug)]
789struct RenderNode {
790    size_estimate: Cell<Option<SizeEstimate>>,
791    info: RenderNodeInfo,
792    style: ComputedStyle,
793}
794
795impl RenderNode {
796    /// Create a node from the RenderNodeInfo.
797    fn new(info: RenderNodeInfo) -> RenderNode {
798        RenderNode {
799            size_estimate: Cell::new(None),
800            info,
801            style: Default::default(),
802        }
803    }
804
805    /// Create a node from the RenderNodeInfo.
806    fn new_styled(info: RenderNodeInfo, style: ComputedStyle) -> RenderNode {
807        RenderNode {
808            size_estimate: Cell::new(None),
809            info,
810            style,
811        }
812    }
813
814    /// Get a size estimate
815    fn get_size_estimate(&self) -> SizeEstimate {
816        self.size_estimate.get().unwrap()
817    }
818
819    /// Calculate the size of this node.
820    fn calc_size_estimate<D: TextDecorator>(
821        &self,
822        context: &HtmlContext,
823        decorator: &D,
824    ) -> SizeEstimate {
825        // If it's already calculated, then just return the answer.
826        if let Some(s) = self.size_estimate.get() {
827            return s;
828        };
829
830        use RenderNodeInfo::*;
831
832        let recurse = |node: &RenderNode| node.calc_size_estimate(context, decorator);
833
834        // Otherwise, make an estimate.
835        let estimate = match self.info {
836            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
837                use unicode_width::UnicodeWidthChar;
838                let mut len = 0;
839                let mut in_whitespace = false;
840                for c in t.trim_collapsible_ws().chars() {
841                    let is_collapsible_ws = !c.always_takes_space();
842                    if !is_collapsible_ws {
843                        len += UnicodeWidthChar::width(c).unwrap_or(0);
844                        // Count the preceding whitespace as one.
845                        if in_whitespace {
846                            len += 1;
847                        }
848                    }
849                    in_whitespace = is_collapsible_ws;
850                }
851                // Add one for preceding whitespace, unless the node is otherwise empty.
852                if let Some(true) = t.chars().next().map(|c| !c.always_takes_space()) {
853                    if len > 0 {
854                        len += 1;
855                    }
856                }
857                if let Img(_, _) = self.info {
858                    len += 2;
859                }
860                SizeEstimate {
861                    size: len,
862                    min_width: len.min(context.min_wrap_width),
863                    prefix_size: 0,
864                }
865            }
866
867            Container(ref v) | Em(ref v) | Strong(ref v) | Strikeout(ref v) | Code(ref v)
868            | Block(ref v) | Div(ref v) | Dl(ref v) | Dt(ref v) | ListItem(ref v) | Sup(ref v) => v
869                .iter()
870                .map(recurse)
871                .fold(Default::default(), SizeEstimate::add),
872            Link(ref _target, ref v) => v
873                .iter()
874                .map(recurse)
875                .fold(Default::default(), SizeEstimate::add)
876                .add(SizeEstimate {
877                    size: 5,
878                    min_width: 5,
879                    prefix_size: 0,
880                }),
881            Dd(ref v) | BlockQuote(ref v) | Ul(ref v) => {
882                let prefix = match self.info {
883                    Dd(_) => "  ".into(),
884                    BlockQuote(_) => decorator.quote_prefix(),
885                    Ul(_) => decorator.unordered_item_prefix(),
886                    _ => unreachable!(),
887                };
888                let prefix_width = UnicodeWidthStr::width(prefix.as_str());
889                let mut size = v
890                    .iter()
891                    .map(recurse)
892                    .fold(Default::default(), SizeEstimate::add)
893                    .add_hor(SizeEstimate {
894                        size: prefix_width,
895                        min_width: prefix_width,
896                        prefix_size: 0,
897                    });
898                size.prefix_size = prefix_width;
899                size
900            }
901            Ol(i, ref v) => {
902                let prefix_size = calc_ol_prefix_size(i, v.len(), decorator);
903                let mut result = v
904                    .iter()
905                    .map(recurse)
906                    .fold(Default::default(), SizeEstimate::add)
907                    .add_hor(SizeEstimate {
908                        size: prefix_size,
909                        min_width: prefix_size,
910                        prefix_size: 0,
911                    });
912                result.prefix_size = prefix_size;
913                result
914            }
915            Header(level, ref v) => {
916                let prefix_size = decorator.header_prefix(level).len();
917                let mut size = v
918                    .iter()
919                    .map(recurse)
920                    .fold(Default::default(), SizeEstimate::add)
921                    .add_hor(SizeEstimate {
922                        size: prefix_size,
923                        min_width: prefix_size,
924                        prefix_size: 0,
925                    });
926                size.prefix_size = prefix_size;
927                size
928            }
929            Break => SizeEstimate {
930                size: 1,
931                min_width: 1,
932                prefix_size: 0,
933            },
934            Table(ref t) => t.calc_size_estimate(context),
935            TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
936            FragStart(_) => Default::default(),
937        };
938        self.size_estimate.set(Some(estimate));
939        estimate
940    }
941
942    /// Return true if this node is definitely empty.  This is used to quickly
943    /// remove e.g. links with no anchor text in most cases, but can't recurse
944    /// and look more deeply.
945    fn is_shallow_empty(&self) -> bool {
946        use RenderNodeInfo::*;
947
948        // Otherwise, make an estimate.
949        match self.info {
950            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
951                let len = t.trim().len();
952                len == 0
953            }
954
955            Container(ref v)
956            | Link(_, ref v)
957            | Em(ref v)
958            | Strong(ref v)
959            | Strikeout(ref v)
960            | Code(ref v)
961            | Block(ref v)
962            | ListItem(ref v)
963            | Div(ref v)
964            | BlockQuote(ref v)
965            | Dl(ref v)
966            | Dt(ref v)
967            | Dd(ref v)
968            | Ul(ref v)
969            | Ol(_, ref v)
970            | Sup(ref v) => v.is_empty(),
971            Header(_level, ref v) => v.is_empty(),
972            Break => true,
973            Table(ref _t) => false,
974            TableRow(..) | TableBody(_) | TableCell(_) => false,
975            FragStart(_) => true,
976        }
977    }
978
979    fn write_container(
980        &self,
981        name: &str,
982        items: &[RenderNode],
983        f: &mut std::fmt::Formatter,
984        indent: usize,
985    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
986        writeln!(f, "{:indent$}{name}:", "")?;
987        for item in items {
988            item.write_self(f, indent + 1)?;
989        }
990        Ok(())
991    }
992    fn write_style(
993        f: &mut std::fmt::Formatter,
994        indent: usize,
995        style: &ComputedStyle,
996    ) -> std::result::Result<(), std::fmt::Error> {
997        use std::fmt::Write;
998        let mut stylestr = String::new();
999
1000        #[cfg(feature = "css")]
1001        {
1002            if let Some(col) = style.colour.val() {
1003                write!(&mut stylestr, " colour={:?}", col)?;
1004            }
1005            if let Some(col) = style.bg_colour.val() {
1006                write!(&mut stylestr, " bg_colour={:?}", col)?;
1007            }
1008            if let Some(val) = style.display.val() {
1009                write!(&mut stylestr, " disp={:?}", val)?;
1010            }
1011        }
1012        if let Some(ws) = style.white_space.val() {
1013            write!(&mut stylestr, " white_space={:?}", ws)?;
1014        }
1015        if style.internal_pre {
1016            write!(&mut stylestr, " internal_pre")?;
1017        }
1018        if !stylestr.is_empty() {
1019            writeln!(f, "{:indent$}[Style:{stylestr}", "")?;
1020        }
1021        Ok(())
1022    }
1023    fn write_self(
1024        &self,
1025        f: &mut std::fmt::Formatter,
1026        indent: usize,
1027    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
1028        Self::write_style(f, indent, &self.style)?;
1029
1030        match &self.info {
1031            RenderNodeInfo::Text(s) => writeln!(f, "{:indent$}{s:?}", "")?,
1032            RenderNodeInfo::Container(v) => {
1033                self.write_container("Container", v, f, indent)?;
1034            }
1035            RenderNodeInfo::Link(targ, v) => {
1036                self.write_container(&format!("Link({})", targ), v, f, indent)?;
1037            }
1038            RenderNodeInfo::Em(v) => {
1039                self.write_container("Em", v, f, indent)?;
1040            }
1041            RenderNodeInfo::Strong(v) => {
1042                self.write_container("Strong", v, f, indent)?;
1043            }
1044            RenderNodeInfo::Strikeout(v) => {
1045                self.write_container("Strikeout", v, f, indent)?;
1046            }
1047            RenderNodeInfo::Code(v) => {
1048                self.write_container("Code", v, f, indent)?;
1049            }
1050            RenderNodeInfo::Img(src, title) => {
1051                writeln!(f, "{:indent$}Img src={:?} title={:?}:", "", src, title)?;
1052            }
1053            RenderNodeInfo::Svg(title) => {
1054                writeln!(f, "{:indent$}Svg title={:?}:", "", title)?;
1055            }
1056            RenderNodeInfo::Block(v) => {
1057                self.write_container("Block", v, f, indent)?;
1058            }
1059            RenderNodeInfo::Header(depth, v) => {
1060                self.write_container(&format!("Header({})", depth), v, f, indent)?;
1061            }
1062            RenderNodeInfo::Div(v) => {
1063                self.write_container("Div", v, f, indent)?;
1064            }
1065            RenderNodeInfo::BlockQuote(v) => {
1066                self.write_container("BlockQuote", v, f, indent)?;
1067            }
1068            RenderNodeInfo::Ul(v) => {
1069                self.write_container("Ul", v, f, indent)?;
1070            }
1071            RenderNodeInfo::Ol(start, v) => {
1072                self.write_container(&format!("Ol({})", start), v, f, indent)?;
1073            }
1074            RenderNodeInfo::Dl(v) => {
1075                self.write_container("Dl", v, f, indent)?;
1076            }
1077            RenderNodeInfo::Dt(v) => {
1078                self.write_container("Dt", v, f, indent)?;
1079            }
1080            RenderNodeInfo::Dd(v) => {
1081                self.write_container("Dd", v, f, indent)?;
1082            }
1083            RenderNodeInfo::Break => {
1084                writeln!(f, "{:indent$}Break", "", indent = indent)?;
1085            }
1086            RenderNodeInfo::Table(rows) => {
1087                writeln!(f, "{:indent$}Table ({} cols):", "", rows.num_columns)?;
1088                for rtr in &rows.rows {
1089                    Self::write_style(f, indent + 1, &rtr.style)?;
1090                    writeln!(
1091                        f,
1092                        "{:width$}Row ({} cells):",
1093                        "",
1094                        rtr.cells.len(),
1095                        width = indent + 1
1096                    )?;
1097                    for cell in &rtr.cells {
1098                        Self::write_style(f, indent + 2, &cell.style)?;
1099                        writeln!(
1100                            f,
1101                            "{:width$}Cell colspan={} width={:?}:",
1102                            "",
1103                            cell.colspan,
1104                            cell.col_width,
1105                            width = indent + 2
1106                        )?;
1107                        for node in &cell.content {
1108                            node.write_self(f, indent + 3)?;
1109                        }
1110                    }
1111                }
1112            }
1113            RenderNodeInfo::TableBody(_) => todo!(),
1114            RenderNodeInfo::TableRow(_, _) => todo!(),
1115            RenderNodeInfo::TableCell(_) => todo!(),
1116            RenderNodeInfo::FragStart(frag) => {
1117                writeln!(f, "{:indent$}FragStart({}):", "", frag)?;
1118            }
1119            RenderNodeInfo::ListItem(v) => {
1120                self.write_container("ListItem", v, f, indent)?;
1121            }
1122            RenderNodeInfo::Sup(v) => {
1123                self.write_container("Sup", v, f, indent)?;
1124            }
1125        }
1126        Ok(())
1127    }
1128}
1129
1130fn precalc_size_estimate<'a, D: TextDecorator>(
1131    node: &'a RenderNode,
1132    context: &mut HtmlContext,
1133    decorator: &'a D,
1134) -> TreeMapResult<'a, HtmlContext, &'a RenderNode, ()> {
1135    use RenderNodeInfo::*;
1136    if node.size_estimate.get().is_some() {
1137        return TreeMapResult::Nothing;
1138    }
1139    match node.info {
1140        Text(_) | Img(_, _) | Svg(_) | Break | FragStart(_) => {
1141            let _ = node.calc_size_estimate(context, decorator);
1142            TreeMapResult::Nothing
1143        }
1144
1145        Container(ref v)
1146        | Link(_, ref v)
1147        | Em(ref v)
1148        | Strong(ref v)
1149        | Strikeout(ref v)
1150        | Code(ref v)
1151        | Block(ref v)
1152        | ListItem(ref v)
1153        | Div(ref v)
1154        | BlockQuote(ref v)
1155        | Ul(ref v)
1156        | Ol(_, ref v)
1157        | Dl(ref v)
1158        | Dt(ref v)
1159        | Dd(ref v)
1160        | Sup(ref v)
1161        | Header(_, ref v) => TreeMapResult::PendingChildren {
1162            children: v.iter().collect(),
1163            cons: Box::new(move |context, _cs| {
1164                node.calc_size_estimate(context, decorator);
1165                Ok(None)
1166            }),
1167            prefn: None,
1168            postfn: None,
1169        },
1170        Table(ref t) => {
1171            /* Return all the indirect children which are RenderNodes. */
1172            let mut children = Vec::new();
1173            for row in &t.rows {
1174                for cell in &row.cells {
1175                    children.extend(cell.content.iter());
1176                }
1177            }
1178            TreeMapResult::PendingChildren {
1179                children,
1180                cons: Box::new(move |context, _cs| {
1181                    node.calc_size_estimate(context, decorator);
1182                    Ok(None)
1183                }),
1184                prefn: None,
1185                postfn: None,
1186            }
1187        }
1188        TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
1189    }
1190}
1191
1192/// Convert a table into a RenderNode
1193fn table_to_render_tree<'a, T: Write>(
1194    input: RenderInput,
1195    computed: ComputedStyle,
1196    _err_out: &mut T,
1197) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1198    pending(input, move |_, rowset| {
1199        let mut rows = vec![];
1200        for bodynode in rowset {
1201            if let RenderNodeInfo::TableBody(body) = bodynode.info {
1202                rows.extend(body);
1203            } else {
1204                html_trace!("Found in table: {:?}", bodynode.info);
1205            }
1206        }
1207        if rows.is_empty() {
1208            None
1209        } else {
1210            Some(RenderNode::new_styled(
1211                RenderNodeInfo::Table(RenderTable::new(rows)),
1212                computed,
1213            ))
1214        }
1215    })
1216}
1217
1218/// Add rows from a thead or tbody.
1219fn tbody_to_render_tree<'a, T: Write>(
1220    input: RenderInput,
1221    computed: ComputedStyle,
1222    _err_out: &mut T,
1223) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1224    pending_noempty(input, move |_, rowchildren| {
1225        let mut rows = rowchildren
1226            .into_iter()
1227            .flat_map(|rownode| {
1228                if let RenderNodeInfo::TableRow(row, _) = rownode.info {
1229                    Some(row)
1230                } else {
1231                    html_trace!("  [[tbody child: {:?}]]", rownode);
1232                    None
1233                }
1234            })
1235            .collect::<Vec<_>>();
1236
1237        // Handle colspan=0 by replacing it.
1238        // Get a list of (has_zero_colspan, sum_colspan)
1239        let num_columns = rows
1240            .iter()
1241            .map(|row| {
1242                row.cells()
1243                    // Treat the column as having colspan 1 for initial counting.
1244                    .map(|cell| (cell.colspan == 0, cell.colspan.max(1)))
1245                    .fold((false, 0), |a, b| (a.0 || b.0, a.1 + b.1))
1246            })
1247            .collect::<Vec<_>>();
1248
1249        let max_columns = num_columns.iter().map(|(_, span)| span).max().unwrap_or(&1);
1250
1251        for (i, &(has_zero, num_cols)) in num_columns.iter().enumerate() {
1252            // Note this won't be sensible if more than one column has colspan=0,
1253            // but that's not very well defined anyway.
1254            if has_zero {
1255                for cell in rows[i].cells_mut() {
1256                    if cell.colspan == 0 {
1257                        // +1 because we said it had 1 to start with
1258                        cell.colspan = max_columns - num_cols + 1;
1259                    }
1260                }
1261            }
1262        }
1263
1264        Some(RenderNode::new_styled(
1265            RenderNodeInfo::TableBody(rows),
1266            computed,
1267        ))
1268    })
1269}
1270
1271/// Convert a table row to a RenderTableRow
1272fn tr_to_render_tree<'a, T: Write>(
1273    input: RenderInput,
1274    computed: ComputedStyle,
1275    _err_out: &mut T,
1276) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1277    pending(input, move |_, cellnodes| {
1278        let cells = cellnodes
1279            .into_iter()
1280            .flat_map(|cellnode| {
1281                if let RenderNodeInfo::TableCell(cell) = cellnode.info {
1282                    Some(cell)
1283                } else {
1284                    html_trace!("  [[tr child: {:?}]]", cellnode);
1285                    None
1286                }
1287            })
1288            .collect();
1289        let style = computed.clone();
1290        Some(RenderNode::new_styled(
1291            RenderNodeInfo::TableRow(
1292                RenderTableRow {
1293                    cells,
1294                    col_sizes: None,
1295                    style,
1296                },
1297                false,
1298            ),
1299            computed,
1300        ))
1301    })
1302}
1303
1304/// Convert a single table cell to a render node.
1305fn td_to_render_tree<'a, T: Write>(
1306    input: RenderInput,
1307    computed: ComputedStyle,
1308    _err_out: &mut T,
1309) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1310    let mut colspan = 1;
1311    let mut rowspan = 1;
1312    if let Element { ref attrs, .. } = input.handle.data {
1313        for attr in attrs.borrow().iter() {
1314            if &attr.name.local == "colspan" {
1315                let v: &str = &attr.value;
1316                colspan = v.parse().unwrap_or(1);
1317            }
1318            if &attr.name.local == "rowspan" {
1319                let v: &str = &attr.value;
1320                rowspan = v.parse().unwrap_or(1);
1321            }
1322        }
1323    }
1324    pending(input, move |_, children| {
1325        let style = computed.clone();
1326        Some(RenderNode::new_styled(
1327            RenderNodeInfo::TableCell(RenderTableCell {
1328                colspan,
1329                rowspan,
1330                content: children,
1331                size_estimate: Cell::new(None),
1332                col_width: None,
1333                x_pos: None,
1334                style,
1335                is_dummy: false,
1336            }),
1337            computed,
1338        ))
1339    })
1340}
1341
1342/// A reducer which combines results from mapping children into
1343/// the result for the current node.  Takes a context and a
1344/// vector of results and returns a new result (or nothing).
1345type ResultReducer<'a, C, R> = dyn FnOnce(&mut C, Vec<R>) -> Result<Option<R>> + 'a;
1346
1347/// A closure to call before processing a child node.
1348type ChildPreFn<C, N> = dyn Fn(&mut C, &N) -> Result<()>;
1349
1350/// A closure to call after processing a child node,
1351/// before adding the result to the processed results
1352/// vector.
1353type ChildPostFn<C, R> = dyn Fn(&mut C, &R) -> Result<()>;
1354
1355/// The result of trying to render one node.
1356enum TreeMapResult<'a, C, N, R> {
1357    /// A completed result.
1358    Finished(R),
1359    /// Deferred completion - can be turned into a result
1360    /// once the vector of children are processed.
1361    PendingChildren {
1362        children: Vec<N>,
1363        cons: Box<ResultReducer<'a, C, R>>,
1364        prefn: Option<Box<ChildPreFn<C, N>>>,
1365        postfn: Option<Box<ChildPostFn<C, R>>>,
1366    },
1367    /// Nothing (e.g. a comment or other ignored element).
1368    Nothing,
1369}
1370
1371fn tree_map_reduce<'a, C, N, R, M>(
1372    context: &mut C,
1373    top: N,
1374    mut process_node: M,
1375) -> Result<Option<R>>
1376where
1377    M: FnMut(&mut C, N) -> Result<TreeMapResult<'a, C, N, R>>,
1378{
1379    /// A node partially decoded, waiting for its children to
1380    /// be processed.
1381    struct PendingNode<'a, C, R, N> {
1382        /// How to make the node once finished
1383        construct: Box<ResultReducer<'a, C, R>>,
1384        /// Called before processing each child
1385        prefn: Option<Box<ChildPreFn<C, N>>>,
1386        /// Called after processing each child
1387        postfn: Option<Box<ChildPostFn<C, R>>>,
1388        /// Children already processed
1389        children: Vec<R>,
1390        /// Iterator of child nodes not yet processed
1391        to_process: std::vec::IntoIter<N>,
1392    }
1393
1394    let mut last = PendingNode {
1395        // We only expect one child, which we'll just return.
1396        construct: Box::new(|_, mut cs| Ok(cs.pop())),
1397        prefn: None,
1398        postfn: None,
1399        children: Vec::new(),
1400        to_process: vec![top].into_iter(),
1401    };
1402    let mut pending_stack = Vec::new();
1403    loop {
1404        // Get the next child node to process
1405        while let Some(h) = last.to_process.next() {
1406            if let Some(f) = &last.prefn {
1407                f(context, &h)?;
1408            }
1409            match process_node(context, h)? {
1410                TreeMapResult::Finished(result) => {
1411                    if let Some(f) = &last.postfn {
1412                        f(context, &result)?;
1413                    }
1414                    last.children.push(result);
1415                }
1416                TreeMapResult::PendingChildren {
1417                    children,
1418                    cons,
1419                    prefn,
1420                    postfn,
1421                } => {
1422                    pending_stack.push(last);
1423                    last = PendingNode {
1424                        construct: cons,
1425                        prefn,
1426                        postfn,
1427                        children: Vec::new(),
1428                        to_process: children.into_iter(),
1429                    };
1430                }
1431                TreeMapResult::Nothing => {}
1432            };
1433        }
1434        // No more children, so finally construct the parent.
1435        if let Some(mut parent) = pending_stack.pop() {
1436            if let Some(node) = (last.construct)(context, last.children)? {
1437                if let Some(f) = &parent.postfn {
1438                    f(context, &node)?;
1439                }
1440                parent.children.push(node);
1441            }
1442            last = parent;
1443            continue;
1444        }
1445        // Finished the whole stack!
1446        break Ok((last.construct)(context, last.children)?);
1447    }
1448}
1449
1450#[cfg(feature = "css_ext")]
1451#[derive(Clone, Default)]
1452struct HighlighterMap {
1453    map: HashMap<String, Rc<SyntaxHighlighter>>,
1454}
1455
1456#[cfg(feature = "css_ext")]
1457impl HighlighterMap {
1458    pub fn get(&self, name: &str) -> Option<Rc<SyntaxHighlighter>> {
1459        self.map.get(name).cloned()
1460    }
1461
1462    fn insert(&mut self, name: impl Into<String>, f: Rc<SyntaxHighlighter>) {
1463        self.map.insert(name.into(), f);
1464    }
1465}
1466
1467#[cfg(feature = "css_ext")]
1468impl std::fmt::Debug for HighlighterMap {
1469    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1470        f.debug_struct("HighlighterMap")
1471            .field("map", &self.map.keys().collect::<Vec<_>>())
1472            .finish()
1473    }
1474}
1475
1476#[cfg(feature = "css_ext")]
1477impl PartialEq for HighlighterMap {
1478    fn eq(&self, _other: &Self) -> bool {
1479        todo!()
1480    }
1481}
1482
1483#[cfg(feature = "css_ext")]
1484impl Eq for HighlighterMap {}
1485
1486#[derive(Debug, PartialEq, Eq)]
1487struct HtmlContext {
1488    style_data: css::StyleData,
1489    #[cfg(feature = "css")]
1490    use_doc_css: bool,
1491
1492    max_wrap_width: Option<usize>,
1493    pad_block_width: bool,
1494    allow_width_overflow: bool,
1495    min_wrap_width: usize,
1496    raw: bool,
1497    draw_borders: bool,
1498    wrap_links: bool,
1499    include_link_footnotes: bool,
1500    use_unicode_strikeout: bool,
1501    image_mode: config::ImageRenderMode,
1502
1503    #[cfg(feature = "css_ext")]
1504    syntax_highlighters: HighlighterMap,
1505}
1506
1507// Input to render tree conversion.
1508struct RenderInput {
1509    handle: Handle,
1510    parent_style: Rc<ComputedStyle>,
1511    // Overlay styles from syntax highlighting.
1512    #[cfg(feature = "css_ext")]
1513    extra_styles: RefCell<Vec<(Range<usize>, TextStyle)>>,
1514    // Map from node to the length of enclosed text nodes.
1515    node_lengths: Rc<RefCell<HashMap<*const Node, usize>>>,
1516}
1517
1518impl RenderInput {
1519    fn new(handle: Handle, parent_style: Rc<ComputedStyle>) -> Self {
1520        RenderInput {
1521            handle,
1522            parent_style,
1523            #[cfg(feature = "css_ext")]
1524            extra_styles: Default::default(),
1525            node_lengths: Default::default(),
1526        }
1527    }
1528
1529    #[cfg(feature = "css_ext")]
1530    fn set_syntax_info(&self, full_text: &str, highlighted: Vec<(TextStyle, &str)>) {
1531        let mut node_styles = Vec::new();
1532
1533        // Turn the returned strings into offsets into full_text.  We assume
1534        // we can maintain relative offsets as we step through the tree rendering.
1535        for (style, s) in highlighted {
1536            fn get_offset(full: &str, sub: &str) -> Option<Range<usize>> {
1537                // This looks scary, but if we get this wrong the worst case is
1538                // that we end up panicking when using the offsets.
1539                let full_start = full.as_ptr() as usize;
1540                let full_end = full_start + full.len();
1541                let sub_start = sub.as_ptr() as usize;
1542                let sub_end = sub_start + sub.len();
1543
1544                if sub_start >= full_start && sub_end <= full_end {
1545                    Some((sub_start - full_start)..(sub_end - full_start))
1546                } else {
1547                    None
1548                }
1549            }
1550
1551            if let Some(offset_range) = get_offset(full_text, s) {
1552                node_styles.push((offset_range, style));
1553            } // else we ignore the highlight.
1554        }
1555        node_styles.sort_by_key(|r| (r.0.start, r.0.end));
1556        *self.extra_styles.borrow_mut() = node_styles;
1557    }
1558
1559    // Return the children in the right form
1560    #[allow(clippy::mut_range_bound)]
1561    fn children(&self) -> Vec<RenderInput> {
1562        #[cfg(feature = "css_ext")]
1563        if !self.extra_styles.borrow().is_empty() {
1564            let mut offset = 0;
1565            let mut result = Vec::new();
1566            let mut start_style_index = 0;
1567            let node_lengths = self.node_lengths.borrow();
1568            let extra_styles = self.extra_styles.borrow();
1569            for child in &*self.handle.children.borrow() {
1570                let end_offset = offset + node_lengths.get(&Rc::as_ptr(child)).unwrap();
1571                let mut child_extra_styles = Vec::new();
1572                for es_idx in start_style_index..extra_styles.len() {
1573                    let mut style_range = extra_styles[es_idx].0.clone();
1574                    if style_range.start >= end_offset {
1575                        // We've gone too far.
1576                        break;
1577                    }
1578                    if style_range.end <= offset {
1579                        // We don't need to look at this again
1580                        // Note this is here to restart this loop in a different place
1581                        // in the next run of the outer loop; hence allowing
1582                        // clippy::mut_range_bound on the function.
1583                        start_style_index = es_idx;
1584                    }
1585                    // This piece must overlap!
1586                    // Clip the range to this node.
1587                    style_range.start = style_range.start.max(offset) - offset;
1588                    style_range.end = style_range.end.min(end_offset) - offset;
1589
1590                    child_extra_styles.push((style_range, extra_styles[es_idx].1.clone()));
1591                }
1592                result.push(RenderInput {
1593                    handle: Rc::clone(child),
1594                    parent_style: Rc::clone(&self.parent_style),
1595                    extra_styles: RefCell::new(child_extra_styles),
1596                    node_lengths: self.node_lengths.clone(),
1597                });
1598                offset = end_offset;
1599            }
1600            return result;
1601        }
1602
1603        // Simple case, and we might not have the node lengths.
1604        self.handle
1605            .children
1606            .borrow()
1607            .iter()
1608            .map(|child| RenderInput {
1609                handle: child.clone(),
1610                parent_style: Rc::clone(&self.parent_style),
1611                #[cfg(feature = "css_ext")]
1612                extra_styles: Default::default(),
1613                node_lengths: self.node_lengths.clone(),
1614            })
1615            .collect()
1616    }
1617
1618    #[cfg(feature = "css_ext")]
1619    fn do_extract_text(
1620        out: &mut String,
1621        handle: &Handle,
1622        length_map: &mut HashMap<*const Node, usize>,
1623    ) {
1624        match handle.data {
1625            markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
1626                let s: &str = &tstr.borrow();
1627                out.push_str(s);
1628                length_map.entry(Rc::as_ptr(handle)).or_insert(s.len());
1629            }
1630            _ => {
1631                for child in handle.children.borrow().iter() {
1632                    let len_before = out.len();
1633                    RenderInput::do_extract_text(out, child, length_map);
1634                    let len_after = out.len();
1635                    length_map
1636                        .entry(Rc::as_ptr(child))
1637                        .or_insert(len_after - len_before);
1638                }
1639            }
1640        }
1641    }
1642
1643    #[cfg(feature = "css_ext")]
1644    /// Return a full String, and a list of where substrings came from:
1645    ///
1646    fn extract_raw_text(&self) -> String {
1647        let mut result = String::new();
1648        RenderInput::do_extract_text(
1649            &mut result,
1650            &self.handle,
1651            &mut self.node_lengths.borrow_mut(),
1652        );
1653        result
1654    }
1655}
1656
1657fn dom_to_render_tree_with_context<T: Write>(
1658    handle: Handle,
1659    err_out: &mut T,
1660    context: &mut HtmlContext,
1661) -> Result<Option<RenderNode>> {
1662    html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
1663    #[cfg(feature = "css")]
1664    if context.use_doc_css {
1665        let mut doc_style_data = css::dom_extract::dom_to_stylesheet(handle.clone(), err_out)?;
1666        doc_style_data.merge(std::mem::take(&mut context.style_data));
1667        context.style_data = doc_style_data;
1668    }
1669
1670    let parent_style = Default::default();
1671    let result = tree_map_reduce(
1672        context,
1673        RenderInput::new(handle, parent_style),
1674        |context, input| process_dom_node(input, err_out, context),
1675    );
1676
1677    html_trace!("### dom_to_render_tree: out= {:#?}", result);
1678    result
1679}
1680
1681#[cfg(feature = "css")]
1682/// Return a string representation of the CSS rules parsed from
1683/// the DOM document.
1684pub fn dom_to_parsed_style(dom: &RcDom) -> Result<String> {
1685    let handle = dom.document.clone();
1686    let doc_style_data = css::dom_extract::dom_to_stylesheet(handle, &mut std::io::sink())?;
1687    Ok(doc_style_data.to_string())
1688}
1689
1690fn pending<F>(
1691    input: RenderInput,
1692    f: F,
1693) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1694where
1695    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1696{
1697    TreeMapResult::PendingChildren {
1698        children: input.children(),
1699        cons: Box::new(move |ctx, children| Ok(f(ctx, children))),
1700        prefn: None,
1701        postfn: None,
1702    }
1703}
1704
1705fn pending_noempty<F>(
1706    input: RenderInput,
1707    f: F,
1708) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1709where
1710    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1711{
1712    let handle = &input.handle;
1713    let style = &input.parent_style;
1714    TreeMapResult::PendingChildren {
1715        children: handle
1716            .children
1717            .borrow()
1718            .iter()
1719            .map(|child| RenderInput::new(child.clone(), Rc::clone(style)))
1720            .collect(),
1721        cons: Box::new(move |ctx, children| {
1722            if children.is_empty() {
1723                Ok(None)
1724            } else {
1725                Ok(f(ctx, children))
1726            }
1727        }),
1728        prefn: None,
1729        postfn: None,
1730    }
1731}
1732
1733#[derive(Copy, Clone, Eq, PartialEq, Debug)]
1734enum ChildPosition {
1735    Start,
1736    End,
1737}
1738
1739/// Prepend or append a FragmentStart (or analogous) marker to an existing
1740/// RenderNode.
1741fn insert_child(
1742    new_child: RenderNode,
1743    mut orig: RenderNode,
1744    position: ChildPosition,
1745) -> RenderNode {
1746    use RenderNodeInfo::*;
1747    html_trace!("insert_child({:?}, {:?}, {:?})", new_child, orig, position);
1748
1749    match orig.info {
1750        // For block elements such as Block and Div, we need to insert
1751        // the node at the front of their children array, otherwise
1752        // the renderer is liable to drop the fragment start marker
1753        // _before_ the new line indicating the end of the previous
1754        // paragraph.
1755        //
1756        // For Container, we do the same thing just to make the data
1757        // less pointlessly nested.
1758        Block(ref mut children)
1759        | ListItem(ref mut children)
1760        | Dd(ref mut children)
1761        | Dt(ref mut children)
1762        | Dl(ref mut children)
1763        | Div(ref mut children)
1764        | BlockQuote(ref mut children)
1765        | Container(ref mut children)
1766        | TableCell(RenderTableCell {
1767            content: ref mut children,
1768            ..
1769        }) => {
1770            match position {
1771                ChildPosition::Start => children.insert(0, new_child),
1772                ChildPosition::End => children.push(new_child),
1773            }
1774            // Now return orig, but we do that outside the match so
1775            // that we've given back the borrowed ref 'children'.
1776        }
1777
1778        // For table rows and tables, push down if there's any content.
1779        TableRow(ref mut rrow, _) => {
1780            // If the row is empty, then there isn't really anything
1781            // to attach the fragment start to.
1782            if let Some(cell) = rrow.cells.first_mut() {
1783                match position {
1784                    ChildPosition::Start => cell.content.insert(0, new_child),
1785                    ChildPosition::End => cell.content.push(new_child),
1786                }
1787            }
1788        }
1789
1790        TableBody(ref mut rows) | Table(RenderTable { ref mut rows, .. }) => {
1791            // If the row is empty, then there isn't really anything
1792            // to attach the fragment start to.
1793            if let Some(rrow) = rows.first_mut() {
1794                if let Some(cell) = rrow.cells.first_mut() {
1795                    match position {
1796                        ChildPosition::Start => cell.content.insert(0, new_child),
1797                        ChildPosition::End => cell.content.push(new_child),
1798                    }
1799                }
1800            }
1801        }
1802
1803        // For anything else, just make a new Container with the
1804        // new_child node and the original one.
1805        _ => {
1806            let result = match position {
1807                ChildPosition::Start => RenderNode::new(Container(vec![new_child, orig])),
1808                ChildPosition::End => RenderNode::new(Container(vec![orig, new_child])),
1809            };
1810            html_trace!("insert_child() -> {:?}", result);
1811            return result;
1812        }
1813    }
1814    html_trace!("insert_child() -> {:?}", &orig);
1815    orig
1816}
1817
1818fn process_dom_node<T: Write>(
1819    input: RenderInput,
1820    err_out: &mut T,
1821    #[allow(unused)] // Used with css feature
1822    context: &mut HtmlContext,
1823) -> Result<TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>> {
1824    use RenderNodeInfo::*;
1825    use TreeMapResult::*;
1826
1827    Ok(match input.handle.clone().data {
1828        Document => pending(input, |_context, cs| Some(RenderNode::new(Container(cs)))),
1829        Comment { .. } => Nothing,
1830        Element {
1831            ref name,
1832            ref attrs,
1833            ..
1834        } => {
1835            let mut frag_from_name_attr = false;
1836
1837            let RenderInput {
1838                ref handle,
1839                ref parent_style,
1840                ..
1841            } = input;
1842
1843            #[cfg(feature = "css")]
1844            let use_doc_css = context.use_doc_css;
1845            #[cfg(not(feature = "css"))]
1846            let use_doc_css = false;
1847
1848            let computed = {
1849                let computed = context
1850                    .style_data
1851                    .computed_style(parent_style, handle, use_doc_css);
1852                #[cfg(feature = "css")]
1853                match computed.display.val() {
1854                    Some(css::Display::None) => return Ok(Nothing),
1855                    #[cfg(feature = "css_ext")]
1856                    Some(css::Display::ExtRawDom) => {
1857                        use html5ever::interface::{NodeOrText, TreeSink};
1858                        use html5ever::{LocalName, QualName};
1859                        let mut html_bytes: Vec<u8> = Default::default();
1860                        handle.serialize(&mut html_bytes)?;
1861
1862                        // Make a new DOM object so that we can easily create new
1863                        // nodes.  They will be independent.
1864                        let dom = RcDom::default();
1865
1866                        // We'll enclose it in a `<pre>`, so that we have an element in the right
1867                        // shape to process.
1868                        let html_string = String::from_utf8_lossy(&html_bytes).into_owned();
1869                        let pre_node = dom.create_element(
1870                            QualName::new(None, ns!(html), LocalName::from("pre")),
1871                            vec![],
1872                            Default::default(),
1873                        );
1874                        dom.append(&pre_node, NodeOrText::AppendText(html_string.into()));
1875
1876                        // Remove the RawDom setting; we don't want to be recursively converting to
1877                        // raw DOM.
1878                        let mut my_computed = computed;
1879                        my_computed.display = Default::default();
1880                        // Preformat it
1881                        my_computed.white_space.maybe_update(
1882                            false,
1883                            StyleOrigin::Agent,
1884                            Default::default(),
1885                            WhiteSpace::Pre,
1886                        );
1887                        my_computed.internal_pre = true;
1888
1889                        let new_input = RenderInput {
1890                            handle: pre_node,
1891                            parent_style: Rc::new(my_computed.clone()),
1892                            extra_styles: Default::default(),
1893                            node_lengths: Default::default(),
1894                        };
1895
1896                        if let Some(syntax_info) = my_computed.syntax.val() {
1897                            if let Some(highlighter) =
1898                                context.syntax_highlighters.get(&syntax_info.language)
1899                            {
1900                                // Do the highlighting here.
1901                                let text = new_input.extract_raw_text();
1902                                let highlighted = highlighter(&text);
1903                                new_input.set_syntax_info(&text, highlighted);
1904                            }
1905                        }
1906                        return Ok(pending(new_input, move |_, cs| {
1907                            Some(RenderNode::new_styled(Container(cs), my_computed))
1908                        }));
1909                    }
1910                    _ => (),
1911                }
1912                #[cfg(feature = "css_ext")]
1913                if let Some(syntax_info) = computed.syntax.val() {
1914                    if let Some(highlighter) =
1915                        context.syntax_highlighters.get(&syntax_info.language)
1916                    {
1917                        let extracted_text = input.extract_raw_text();
1918                        let highlighted = highlighter(&extracted_text);
1919                        input.set_syntax_info(&extracted_text, highlighted);
1920                    }
1921                }
1922
1923                computed
1924            };
1925
1926            let computed_before = computed.content_before.clone();
1927            let computed_after = computed.content_after.clone();
1928
1929            let result = match name.expanded() {
1930                expanded_name!(html "html") | expanded_name!(html "body") => {
1931                    /* process children, but don't add anything */
1932                    pending(input, move |_, cs| {
1933                        Some(RenderNode::new_styled(Container(cs), computed))
1934                    })
1935                }
1936                expanded_name!(html "link")
1937                | expanded_name!(html "meta")
1938                | expanded_name!(html "hr")
1939                | expanded_name!(html "script")
1940                | expanded_name!(html "style")
1941                | expanded_name!(html "head") => {
1942                    /* Ignore the head and its children */
1943                    Nothing
1944                }
1945                expanded_name!(html "span") => {
1946                    /* process children, but don't add anything */
1947                    pending_noempty(input, move |_, cs| {
1948                        Some(RenderNode::new_styled(Container(cs), computed))
1949                    })
1950                }
1951                expanded_name!(html "a") => {
1952                    let borrowed = attrs.borrow();
1953                    let mut target = None;
1954                    frag_from_name_attr = true;
1955                    for attr in borrowed.iter() {
1956                        if &attr.name.local == "href" {
1957                            target = Some(&*attr.value);
1958                            break;
1959                        }
1960                    }
1961                    PendingChildren {
1962                        children: input.children(),
1963                        cons: if let Some(href) = target {
1964                            let href: String = href.into();
1965                            Box::new(move |_, cs: Vec<RenderNode>| {
1966                                if cs.iter().any(|c| !c.is_shallow_empty()) {
1967                                    Ok(Some(RenderNode::new_styled(Link(href, cs), computed)))
1968                                } else {
1969                                    Ok(None)
1970                                }
1971                            })
1972                        } else {
1973                            Box::new(move |_, cs| {
1974                                Ok(Some(RenderNode::new_styled(Container(cs), computed)))
1975                            })
1976                        },
1977                        prefn: None,
1978                        postfn: None,
1979                    }
1980                }
1981                expanded_name!(html "em")
1982                | expanded_name!(html "i")
1983                | expanded_name!(html "ins") => pending(input, move |_, cs| {
1984                    Some(RenderNode::new_styled(Em(cs), computed))
1985                }),
1986                expanded_name!(html "strong") | expanded_name!(html "b") => {
1987                    pending(input, move |_, cs| {
1988                        Some(RenderNode::new_styled(Strong(cs), computed))
1989                    })
1990                }
1991                expanded_name!(html "s") | expanded_name!(html "del") => {
1992                    pending(input, move |_, cs| {
1993                        Some(RenderNode::new_styled(Strikeout(cs), computed))
1994                    })
1995                }
1996                expanded_name!(html "code") => pending(input, move |_, cs| {
1997                    Some(RenderNode::new_styled(Code(cs), computed))
1998                }),
1999                expanded_name!(html "img") => {
2000                    let borrowed = attrs.borrow();
2001                    let mut title = None;
2002                    let mut src = None;
2003                    for attr in borrowed.iter() {
2004                        if &attr.name.local == "alt" && !attr.value.is_empty() {
2005                            title = Some(&*attr.value);
2006                        }
2007                        if &attr.name.local == "src" && !attr.value.is_empty() {
2008                            src = Some(&*attr.value);
2009                        }
2010                        if title.is_some() && src.is_some() {
2011                            break;
2012                        }
2013                    }
2014                    // Ignore `<img>` without src.
2015                    if let Some(src) = src {
2016                        Finished(RenderNode::new_styled(
2017                            Img(src.into(), title.unwrap_or("").into()),
2018                            computed,
2019                        ))
2020                    } else {
2021                        Nothing
2022                    }
2023                }
2024                expanded_name!(svg "svg") => {
2025                    // Inline SVG: look for a <title> child for the title.
2026                    let mut title = None;
2027
2028                    for node in input.handle.children.borrow().iter() {
2029                        if let markup5ever_rcdom::NodeData::Element { ref name, .. } = node.data {
2030                            if matches!(name.expanded(), expanded_name!(svg "title")) {
2031                                let mut title_str = String::new();
2032                                for subnode in node.children.borrow().iter() {
2033                                    if let markup5ever_rcdom::NodeData::Text { ref contents } =
2034                                        subnode.data
2035                                    {
2036                                        title_str.push_str(&contents.borrow());
2037                                    }
2038                                }
2039                                title = Some(title_str);
2040                            } else {
2041                                // The first item has to be <title>
2042                                break;
2043                            }
2044                        }
2045                    }
2046
2047                    Finished(RenderNode::new_styled(
2048                        Svg(title.unwrap_or_else(|| String::new())),
2049                        computed,
2050                    ))
2051                }
2052                expanded_name!(html "h1")
2053                | expanded_name!(html "h2")
2054                | expanded_name!(html "h3")
2055                | expanded_name!(html "h4")
2056                | expanded_name!(html "h5")
2057                | expanded_name!(html "h6") => {
2058                    let level: usize = name.local[1..].parse().unwrap();
2059                    pending(input, move |_, cs| {
2060                        Some(RenderNode::new_styled(Header(level, cs), computed))
2061                    })
2062                }
2063                expanded_name!(html "p") => pending_noempty(input, move |_, cs| {
2064                    Some(RenderNode::new_styled(Block(cs), computed))
2065                }),
2066                expanded_name!(html "li") => pending(input, move |_, cs| {
2067                    Some(RenderNode::new_styled(ListItem(cs), computed))
2068                }),
2069                expanded_name!(html "sup") => pending(input, move |_, cs| {
2070                    Some(RenderNode::new_styled(Sup(cs), computed))
2071                }),
2072                expanded_name!(html "div") => pending_noempty(input, move |_, cs| {
2073                    Some(RenderNode::new_styled(Div(cs), computed))
2074                }),
2075                expanded_name!(html "pre") => pending(input, move |_, cs| {
2076                    let mut computed = computed;
2077                    computed.white_space.maybe_update(
2078                        false,
2079                        StyleOrigin::Agent,
2080                        Default::default(),
2081                        WhiteSpace::Pre,
2082                    );
2083                    computed.internal_pre = true;
2084                    Some(RenderNode::new_styled(Block(cs), computed))
2085                }),
2086                expanded_name!(html "br") => Finished(RenderNode::new_styled(Break, computed)),
2087                expanded_name!(html "wbr") => {
2088                    Finished(RenderNode::new_styled(Text("\u{200b}".into()), computed))
2089                }
2090                expanded_name!(html "table") => table_to_render_tree(input, computed, err_out),
2091                expanded_name!(html "thead") | expanded_name!(html "tbody") => {
2092                    tbody_to_render_tree(input, computed, err_out)
2093                }
2094                expanded_name!(html "tr") => tr_to_render_tree(input, computed, err_out),
2095                expanded_name!(html "th") | expanded_name!(html "td") => {
2096                    td_to_render_tree(input, computed, err_out)
2097                }
2098                expanded_name!(html "blockquote") => pending_noempty(input, move |_, cs| {
2099                    Some(RenderNode::new_styled(BlockQuote(cs), computed))
2100                }),
2101                expanded_name!(html "ul") => pending_noempty(input, move |_, cs| {
2102                    Some(RenderNode::new_styled(Ul(cs), computed))
2103                }),
2104                expanded_name!(html "ol") => {
2105                    let borrowed = attrs.borrow();
2106                    let mut start = 1;
2107                    for attr in borrowed.iter() {
2108                        if &attr.name.local == "start" {
2109                            start = attr.value.parse().ok().unwrap_or(1);
2110                            break;
2111                        }
2112                    }
2113
2114                    pending_noempty(input, move |_, cs| {
2115                        // There can be extra nodes which aren't ListItem (like whitespace text
2116                        // nodes).  We need to filter those out to avoid messing up the rendering.
2117                        let cs = cs
2118                            .into_iter()
2119                            .filter(|n| matches!(n.info, RenderNodeInfo::ListItem(..)))
2120                            .collect();
2121                        Some(RenderNode::new_styled(Ol(start, cs), computed))
2122                    })
2123                }
2124                expanded_name!(html "dl") => {
2125                    pending_noempty(input, move |_, cs| {
2126                        // There can be extra nodes which aren't Dt or Dd (like whitespace text
2127                        // nodes).  We need to filter those out to avoid messing up the rendering.
2128                        let cs = cs
2129                            .into_iter()
2130                            .filter(|n| {
2131                                matches!(n.info, RenderNodeInfo::Dt(..) | RenderNodeInfo::Dd(..))
2132                            })
2133                            .collect();
2134                        Some(RenderNode::new_styled(Dl(cs), computed))
2135                    })
2136                }
2137                expanded_name!(html "dt") => pending(input, move |_, cs| {
2138                    Some(RenderNode::new_styled(Dt(cs), computed))
2139                }),
2140                expanded_name!(html "dd") => pending(input, move |_, cs| {
2141                    Some(RenderNode::new_styled(Dd(cs), computed))
2142                }),
2143                _ => {
2144                    html_trace!("Unhandled element: {:?}\n", name.local);
2145                    pending_noempty(input, move |_, cs| {
2146                        Some(RenderNode::new_styled(Container(cs), computed))
2147                    })
2148                }
2149            };
2150
2151            let mut fragment = None;
2152            let borrowed = attrs.borrow();
2153            for attr in borrowed.iter() {
2154                if &attr.name.local == "id" || (frag_from_name_attr && &attr.name.local == "name") {
2155                    fragment = Some(attr.value.to_string());
2156                    break;
2157                }
2158            }
2159
2160            let result = if computed_before.is_some() || computed_after.is_some() {
2161                let wrap_nodes = move |mut node: RenderNode| {
2162                    if let Some(ref content) = computed_before {
2163                        if let Some(pseudo_content) = content.content.val() {
2164                            node = insert_child(
2165                                RenderNode::new(Text(pseudo_content.text.clone())),
2166                                node,
2167                                ChildPosition::Start,
2168                            );
2169                        }
2170                    }
2171                    if let Some(ref content) = computed_after {
2172                        if let Some(pseudo_content) = content.content.val() {
2173                            node = insert_child(
2174                                RenderNode::new(Text(pseudo_content.text.clone())),
2175                                node,
2176                                ChildPosition::End,
2177                            );
2178                        }
2179                    }
2180                    node
2181                };
2182                // Insert extra content nodes
2183                match result {
2184                    Finished(node) => Finished(wrap_nodes(node)),
2185                    // Do we need to wrap a Nothing?
2186                    Nothing => Nothing,
2187                    PendingChildren {
2188                        children,
2189                        cons,
2190                        prefn,
2191                        postfn,
2192                    } => PendingChildren {
2193                        children,
2194                        prefn,
2195                        postfn,
2196                        cons: Box::new(move |ctx, ch| match cons(ctx, ch)? {
2197                            None => Ok(None),
2198                            Some(node) => Ok(Some(wrap_nodes(node))),
2199                        }),
2200                    },
2201                }
2202            } else {
2203                result
2204            };
2205
2206            let Some(fragname) = fragment else {
2207                return Ok(result);
2208            };
2209            match result {
2210                Finished(node) => Finished(insert_child(
2211                    RenderNode::new(FragStart(fragname)),
2212                    node,
2213                    ChildPosition::Start,
2214                )),
2215                Nothing => Finished(RenderNode::new(FragStart(fragname))),
2216                PendingChildren {
2217                    children,
2218                    cons,
2219                    prefn,
2220                    postfn,
2221                } => PendingChildren {
2222                    children,
2223                    prefn,
2224                    postfn,
2225                    cons: Box::new(move |ctx, ch| {
2226                        let fragnode = RenderNode::new(FragStart(fragname));
2227                        match cons(ctx, ch)? {
2228                            None => Ok(Some(fragnode)),
2229                            Some(node) => {
2230                                Ok(Some(insert_child(fragnode, node, ChildPosition::Start)))
2231                            }
2232                        }
2233                    }),
2234                },
2235            }
2236        }
2237        markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
2238            #[cfg(feature = "css_ext")]
2239            if !input.extra_styles.borrow().is_empty() {
2240                let mut nodes = Vec::new();
2241                let mut offset = 0;
2242                for part in &*input.extra_styles.borrow() {
2243                    let (start, end) = (part.0.start, part.0.end);
2244                    if start > offset {
2245                        // Handle the unstyled bit at the start
2246                        nodes.push(RenderNode::new(Text((tstr.borrow()[offset..start]).into())));
2247                    }
2248                    let mut cstyle = input.parent_style.inherit();
2249                    cstyle.colour.maybe_update(
2250                        // TODO: use the right specificity
2251                        cstyle.syntax.important,
2252                        cstyle.syntax.origin,
2253                        cstyle.syntax.specificity,
2254                        part.1.fg_colour,
2255                    );
2256                    if let Some(bgcol) = part.1.bg_colour {
2257                        cstyle.bg_colour.maybe_update(
2258                            // TODO: use the right specificity
2259                            cstyle.syntax.important,
2260                            cstyle.syntax.origin,
2261                            cstyle.syntax.specificity,
2262                            bgcol,
2263                        );
2264                    }
2265                    // Now the styled part
2266                    nodes.push(RenderNode::new_styled(
2267                        Text((tstr.borrow()[start..end]).into()),
2268                        cstyle,
2269                    ));
2270                    offset = end;
2271                }
2272                // the final bit
2273                if offset < tstr.borrow().len() {
2274                    nodes.push(RenderNode::new(Text((tstr.borrow()[offset..]).into())));
2275                }
2276                if nodes.len() == 1 {
2277                    return Ok(Finished(nodes.pop().unwrap()));
2278                } else {
2279                    return Ok(Finished(RenderNode::new(RenderNodeInfo::Container(nodes))));
2280                }
2281            }
2282
2283            Finished(RenderNode::new(Text((&*tstr.borrow()).into())))
2284        }
2285        _ => {
2286            // NodeData doesn't have a Debug impl.
2287            writeln!(err_out, "Unhandled node type.").unwrap();
2288            Nothing
2289        }
2290    })
2291}
2292
2293fn render_tree_to_string<T: Write, D: TextDecorator>(
2294    context: &mut HtmlContext,
2295    renderer: SubRenderer<D>,
2296    decorator: &D,
2297    tree: RenderNode,
2298    err_out: &mut T,
2299) -> Result<SubRenderer<D>> {
2300    /* Phase 1: get size estimates. */
2301    // can't actually error, but Ok-wrap to satisfy tree_map_reduce signature
2302    tree_map_reduce(context, &tree, |context, node| {
2303        Ok(precalc_size_estimate(node, context, decorator))
2304    })?;
2305    /* Phase 2: actually render. */
2306    let mut renderer = TextRenderer::new(renderer);
2307    tree_map_reduce(&mut renderer, tree, |renderer, node| {
2308        Ok(do_render_node(renderer, node, err_out)?)
2309    })?;
2310    let (mut renderer, links) = renderer.into_inner();
2311    let lines = renderer.finalise(links);
2312    // And add the links
2313    if !lines.is_empty() {
2314        renderer.start_block()?;
2315        renderer.fmt_links(lines);
2316    }
2317    Ok(renderer)
2318}
2319
2320fn pending2<
2321    D: TextDecorator,
2322    F: FnOnce(
2323            &mut TextRenderer<D>,
2324            Vec<Option<SubRenderer<D>>>,
2325        ) -> Result<Option<Option<SubRenderer<D>>>>
2326        + 'static,
2327>(
2328    children: Vec<RenderNode>,
2329    f: F,
2330) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2331    TreeMapResult::PendingChildren {
2332        children,
2333        cons: Box::new(f),
2334        prefn: None,
2335        postfn: None,
2336    }
2337}
2338
2339/// Keep track of what style state has been applied to a renderer so that we
2340/// can undo it.
2341#[derive(Default)]
2342struct PushedStyleInfo {
2343    colour: bool,
2344    bgcolour: bool,
2345    white_space: bool,
2346    preformat: bool,
2347}
2348
2349impl PushedStyleInfo {
2350    fn apply<D: TextDecorator>(render: &mut TextRenderer<D>, style: &ComputedStyle) -> Self {
2351        #[allow(unused_mut)]
2352        let mut result: PushedStyleInfo = Default::default();
2353        #[cfg(feature = "css")]
2354        if let Some(col) = style.colour.val() {
2355            render.push_colour(*col);
2356            result.colour = true;
2357        }
2358        #[cfg(feature = "css")]
2359        if let Some(col) = style.bg_colour.val() {
2360            render.push_bgcolour(*col);
2361            result.bgcolour = true;
2362        }
2363        if let Some(ws) = style.white_space.val() {
2364            if let WhiteSpace::Pre | WhiteSpace::PreWrap = ws {
2365                render.push_ws(*ws);
2366                result.white_space = true;
2367            }
2368        }
2369        if style.internal_pre {
2370            render.push_preformat();
2371            result.preformat = true;
2372        }
2373        result
2374    }
2375    fn unwind<D: TextDecorator>(self, renderer: &mut TextRenderer<D>) {
2376        if self.bgcolour {
2377            renderer.pop_bgcolour();
2378        }
2379        if self.colour {
2380            renderer.pop_colour();
2381        }
2382        if self.white_space {
2383            renderer.pop_ws();
2384        }
2385        if self.preformat {
2386            renderer.pop_preformat();
2387        }
2388    }
2389}
2390
2391fn do_render_node<T: Write, D: TextDecorator>(
2392    renderer: &mut TextRenderer<D>,
2393    tree: RenderNode,
2394    err_out: &mut T,
2395) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2396    html_trace!("do_render_node({:?}", tree);
2397    use RenderNodeInfo::*;
2398    use TreeMapResult::*;
2399
2400    let size_estimate = tree.size_estimate.get().unwrap_or_default();
2401
2402    let pushed_style = PushedStyleInfo::apply(renderer, &tree.style);
2403
2404    Ok(match tree.info {
2405        Text(ref tstr) => {
2406            renderer.add_inline_text(tstr)?;
2407            pushed_style.unwind(renderer);
2408            Finished(None)
2409        }
2410        Container(children) => pending2(children, |renderer, _| {
2411            pushed_style.unwind(renderer);
2412            Ok(Some(None))
2413        }),
2414        Link(href, children) => {
2415            renderer.start_link(&href)?;
2416            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2417                renderer.end_link()?;
2418                pushed_style.unwind(renderer);
2419                Ok(Some(None))
2420            })
2421        }
2422        Em(children) => {
2423            renderer.start_emphasis()?;
2424            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2425                renderer.end_emphasis()?;
2426                pushed_style.unwind(renderer);
2427                Ok(Some(None))
2428            })
2429        }
2430        Strong(children) => {
2431            renderer.start_strong()?;
2432            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2433                renderer.end_strong()?;
2434                pushed_style.unwind(renderer);
2435                Ok(Some(None))
2436            })
2437        }
2438        Strikeout(children) => {
2439            renderer.start_strikeout()?;
2440            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2441                renderer.end_strikeout()?;
2442                pushed_style.unwind(renderer);
2443                Ok(Some(None))
2444            })
2445        }
2446        Code(children) => {
2447            renderer.start_code()?;
2448            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2449                renderer.end_code()?;
2450                pushed_style.unwind(renderer);
2451                Ok(Some(None))
2452            })
2453        }
2454        Img(src, title) => {
2455            renderer.add_image(&src, &title)?;
2456            pushed_style.unwind(renderer);
2457            Finished(None)
2458        }
2459        Svg(title) => {
2460            renderer.add_image("", &title)?;
2461            pushed_style.unwind(renderer);
2462            Finished(None)
2463        }
2464        Block(children) | ListItem(children) => {
2465            renderer.start_block()?;
2466            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2467                renderer.end_block();
2468                pushed_style.unwind(renderer);
2469                Ok(Some(None))
2470            })
2471        }
2472        Header(level, children) => {
2473            let prefix = renderer.header_prefix(level);
2474            let prefix_size = size_estimate.prefix_size;
2475            debug_assert!(prefix.len() == prefix_size);
2476            let min_width = size_estimate.min_width;
2477            let inner_width = min_width.saturating_sub(prefix_size);
2478            let sub_builder =
2479                renderer.new_sub_renderer(renderer.width_minus(prefix_size, inner_width)?)?;
2480            renderer.push(sub_builder);
2481            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2482                let sub_builder = renderer.pop();
2483
2484                renderer.start_block()?;
2485                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2486                renderer.end_block();
2487                pushed_style.unwind(renderer);
2488                Ok(Some(None))
2489            })
2490        }
2491        Div(children) => {
2492            renderer.new_line()?;
2493            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2494                renderer.new_line()?;
2495                pushed_style.unwind(renderer);
2496                Ok(Some(None))
2497            })
2498        }
2499        BlockQuote(children) => {
2500            let prefix = renderer.quote_prefix();
2501            debug_assert!(size_estimate.prefix_size == prefix.len());
2502            let inner_width = size_estimate.min_width - prefix.len();
2503            let sub_builder =
2504                renderer.new_sub_renderer(renderer.width_minus(prefix.len(), inner_width)?)?;
2505            renderer.push(sub_builder);
2506            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2507                let sub_builder = renderer.pop();
2508
2509                renderer.start_block()?;
2510                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2511                renderer.end_block();
2512                pushed_style.unwind(renderer);
2513                Ok(Some(None))
2514            })
2515        }
2516        Ul(items) => {
2517            let prefix = renderer.unordered_item_prefix();
2518            let prefix_len = prefix.len();
2519
2520            TreeMapResult::PendingChildren {
2521                children: items,
2522                cons: Box::new(|renderer, _| {
2523                    pushed_style.unwind(renderer);
2524                    Ok(Some(None))
2525                }),
2526                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2527                    let inner_width = size_estimate.min_width - prefix_len;
2528                    let sub_builder = renderer
2529                        .new_sub_renderer(renderer.width_minus(prefix_len, inner_width)?)?;
2530                    renderer.push(sub_builder);
2531                    Ok(())
2532                })),
2533                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2534                    let sub_builder = renderer.pop();
2535
2536                    let indent = " ".repeat(prefix.len());
2537
2538                    renderer.append_subrender(
2539                        sub_builder,
2540                        once(&prefix[..]).chain(repeat(&indent[..])),
2541                    )?;
2542                    Ok(())
2543                })),
2544            }
2545        }
2546        Ol(start, items) => {
2547            let num_items = items.len();
2548
2549            // The prefix width could be at either end if the start is negative.
2550            let min_number = start;
2551            // Assumption: num_items can't overflow isize.
2552            let max_number = start + (num_items as i64) - 1;
2553            let prefix_width_min = renderer.ordered_item_prefix(min_number).len();
2554            let prefix_width_max = renderer.ordered_item_prefix(max_number).len();
2555            let prefix_width = max(prefix_width_min, prefix_width_max);
2556            let prefixn = format!("{: <width$}", "", width = prefix_width);
2557            let i: Cell<_> = Cell::new(start);
2558
2559            TreeMapResult::PendingChildren {
2560                children: items,
2561                cons: Box::new(|renderer, _| {
2562                    pushed_style.unwind(renderer);
2563                    Ok(Some(None))
2564                }),
2565                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2566                    let inner_min = size_estimate.min_width - size_estimate.prefix_size;
2567                    let sub_builder = renderer
2568                        .new_sub_renderer(renderer.width_minus(prefix_width, inner_min)?)?;
2569                    renderer.push(sub_builder);
2570                    Ok(())
2571                })),
2572                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2573                    let sub_builder = renderer.pop();
2574                    let prefix1 = renderer.ordered_item_prefix(i.get());
2575                    let prefix1 = format!("{: <width$}", prefix1, width = prefix_width);
2576
2577                    renderer.append_subrender(
2578                        sub_builder,
2579                        once(prefix1.as_str()).chain(repeat(prefixn.as_str())),
2580                    )?;
2581                    i.set(i.get() + 1);
2582                    Ok(())
2583                })),
2584            }
2585        }
2586        Dl(items) => {
2587            renderer.start_block()?;
2588
2589            TreeMapResult::PendingChildren {
2590                children: items,
2591                cons: Box::new(|renderer, _| {
2592                    pushed_style.unwind(renderer);
2593                    Ok(Some(None))
2594                }),
2595                prefn: None,
2596                postfn: None,
2597            }
2598        }
2599        Dt(children) => {
2600            renderer.new_line()?;
2601            renderer.start_emphasis()?;
2602            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2603                renderer.end_emphasis()?;
2604                pushed_style.unwind(renderer);
2605                Ok(Some(None))
2606            })
2607        }
2608        Dd(children) => {
2609            let inner_min = size_estimate.min_width - 2;
2610            let sub_builder = renderer.new_sub_renderer(renderer.width_minus(2, inner_min)?)?;
2611            renderer.push(sub_builder);
2612            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2613                let sub_builder = renderer.pop();
2614                renderer.append_subrender(sub_builder, repeat("  "))?;
2615                pushed_style.unwind(renderer);
2616                Ok(Some(None))
2617            })
2618        }
2619        Break => {
2620            renderer.new_line_hard()?;
2621            pushed_style.unwind(renderer);
2622            Finished(None)
2623        }
2624        Table(tab) => render_table_tree(renderer, tab, err_out)?,
2625        TableRow(row, false) => render_table_row(renderer, row, pushed_style, err_out),
2626        TableRow(row, true) => render_table_row_vert(renderer, row, pushed_style, err_out),
2627        TableBody(_) => unimplemented!("Unexpected TableBody while rendering"),
2628        TableCell(cell) => render_table_cell(renderer, cell, pushed_style, err_out),
2629        FragStart(fragname) => {
2630            renderer.record_frag_start(&fragname);
2631            pushed_style.unwind(renderer);
2632            Finished(None)
2633        }
2634        Sup(children) => {
2635            // Special case for digit-only superscripts - use superscript
2636            // characters.
2637            fn sup_digits(children: &[RenderNode]) -> Option<String> {
2638                let [node] = children else {
2639                    return None;
2640                };
2641                if let Text(s) = &node.info {
2642                    if s.chars().all(|d| d.is_ascii_digit()) {
2643                        // It's just a string of digits - replace by superscript characters.
2644                        const SUPERSCRIPTS: [char; 10] =
2645                            ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹'];
2646                        return Some(
2647                            s.bytes()
2648                                .map(|b| SUPERSCRIPTS[(b - b'0') as usize])
2649                                .collect(),
2650                        );
2651                    }
2652                }
2653                None
2654            }
2655            if let Some(digitstr) = sup_digits(&children) {
2656                renderer.add_inline_text(&digitstr)?;
2657                pushed_style.unwind(renderer);
2658                Finished(None)
2659            } else {
2660                renderer.start_superscript()?;
2661                pending2(children, |renderer: &mut TextRenderer<D>, _| {
2662                    renderer.end_superscript()?;
2663                    pushed_style.unwind(renderer);
2664                    Ok(Some(None))
2665                })
2666            }
2667        }
2668    })
2669}
2670
2671fn render_table_tree<T: Write, D: TextDecorator>(
2672    renderer: &mut TextRenderer<D>,
2673    table: RenderTable,
2674    _err_out: &mut T,
2675) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2676    /* Now lay out the table. */
2677    let num_columns = table.num_columns;
2678
2679    /* Heuristic: scale the column widths according to how much content there is. */
2680    let mut col_sizes: Vec<SizeEstimate> = vec![Default::default(); num_columns];
2681
2682    for row in table.rows() {
2683        let mut colno = 0;
2684        for cell in row.cells() {
2685            // FIXME: get_size_estimate is still recursive.
2686            let mut estimate = cell.get_size_estimate();
2687
2688            // If the cell has a colspan>1, then spread its size between the
2689            // columns.
2690            estimate.size /= cell.colspan;
2691            estimate.min_width /= cell.colspan;
2692            for i in 0..cell.colspan {
2693                col_sizes[colno + i] = (col_sizes[colno + i]).max(estimate);
2694            }
2695            colno += cell.colspan;
2696        }
2697    }
2698    let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
2699    let min_size: usize = col_sizes.iter().map(|est| est.min_width).sum::<usize>()
2700        + col_sizes.len().saturating_sub(1);
2701    let width = renderer.width();
2702
2703    let vert_row = renderer.options.raw || (min_size > width || width == 0);
2704
2705    let mut col_widths: Vec<usize> = if !vert_row {
2706        col_sizes
2707            .iter()
2708            .map(|sz| {
2709                if sz.size == 0 {
2710                    0
2711                } else {
2712                    min(
2713                        sz.size,
2714                        if usize::MAX / width <= sz.size {
2715                            // The provided width is too large to multiply by width,
2716                            // so do it the other way around.
2717                            max((width / tot_size) * sz.size, sz.min_width)
2718                        } else {
2719                            max(sz.size * width / tot_size, sz.min_width)
2720                        },
2721                    )
2722                }
2723            })
2724            .collect()
2725    } else {
2726        col_sizes.iter().map(|_| width).collect()
2727    };
2728
2729    if !vert_row {
2730        let num_cols = col_widths.len();
2731        if num_cols > 0 {
2732            loop {
2733                let cur_width = col_widths.iter().sum::<usize>() + num_cols - 1;
2734                if cur_width <= width {
2735                    break;
2736                }
2737                let (i, _) = col_widths
2738                    .iter()
2739                    .enumerate()
2740                    .max_by_key(|&(colno, width)| {
2741                        (
2742                            width.saturating_sub(col_sizes[colno].min_width),
2743                            width,
2744                            usize::MAX - colno,
2745                        )
2746                    })
2747                    .unwrap();
2748                col_widths[i] -= 1;
2749            }
2750        }
2751    }
2752
2753    let table_width = if vert_row {
2754        width
2755    } else {
2756        col_widths.iter().cloned().sum::<usize>()
2757            + col_widths
2758                .iter()
2759                .filter(|&w| w > &0)
2760                .count()
2761                .saturating_sub(1)
2762    };
2763
2764    renderer.start_table()?;
2765
2766    if table_width != 0 && renderer.options.draw_borders {
2767        renderer.add_horizontal_border_width(table_width)?;
2768    }
2769
2770    Ok(TreeMapResult::PendingChildren {
2771        children: table.into_rows(col_widths, vert_row),
2772        cons: Box::new(|_, _| Ok(Some(None))),
2773        prefn: None,
2774        postfn: None,
2775    })
2776}
2777
2778fn render_table_row<T: Write, D: TextDecorator>(
2779    _renderer: &mut TextRenderer<D>,
2780    row: RenderTableRow,
2781    pushed_style: PushedStyleInfo,
2782    _err_out: &mut T,
2783) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2784    let rowspans: Vec<usize> = row.cells().map(|cell| cell.rowspan).collect();
2785    let have_overhang = row.cells().any(|cell| cell.is_dummy);
2786    TreeMapResult::PendingChildren {
2787        children: row.into_cells(false),
2788        cons: Box::new(move |builders, children| {
2789            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2790            if have_overhang || children.iter().any(|c| !c.empty()) {
2791                builders.append_columns_with_borders(
2792                    children.into_iter().zip(rowspans.into_iter()),
2793                    true,
2794                )?;
2795            }
2796            pushed_style.unwind(builders);
2797            Ok(Some(None))
2798        }),
2799        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2800            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2801                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2802                renderer.push(sub_builder);
2803                Ok(())
2804            } else {
2805                panic!()
2806            }
2807        })),
2808        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2809    }
2810}
2811
2812fn render_table_row_vert<T: Write, D: TextDecorator>(
2813    _renderer: &mut TextRenderer<D>,
2814    row: RenderTableRow,
2815    pushed_style: PushedStyleInfo,
2816    _err_out: &mut T,
2817) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2818    TreeMapResult::PendingChildren {
2819        children: row.into_cells(true),
2820        cons: Box::new(|builders, children| {
2821            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2822            builders.append_vert_row(children)?;
2823            pushed_style.unwind(builders);
2824            Ok(Some(None))
2825        }),
2826        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2827            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2828                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2829                renderer.push(sub_builder);
2830                Ok(())
2831            } else {
2832                Err(Error::Fail)
2833            }
2834        })),
2835        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2836    }
2837}
2838
2839fn render_table_cell<T: Write, D: TextDecorator>(
2840    _renderer: &mut TextRenderer<D>,
2841    cell: RenderTableCell,
2842    pushed_style: PushedStyleInfo,
2843    _err_out: &mut T,
2844) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2845    pending2(cell.content, |renderer: &mut TextRenderer<D>, _| {
2846        pushed_style.unwind(renderer);
2847        let sub_builder = renderer.pop();
2848
2849        Ok(Some(Some(sub_builder)))
2850    })
2851}
2852
2853pub mod config {
2854    //! Configure the HTML to text translation using the `Config` type, which can be
2855    //! constructed using one of the functions in this module.
2856    use std::io;
2857
2858    use super::Error;
2859    use crate::css::types::Importance;
2860    use crate::css::{Ruleset, Selector, SelectorComponent, Style, StyleData};
2861    #[cfg(feature = "css_ext")]
2862    use crate::{HighlighterMap, SyntaxHighlighter};
2863    use crate::{
2864        HtmlContext, MIN_WIDTH, RenderTree, Result,
2865        css::{PseudoContent, PseudoElement, StyleDecl},
2866        render::text_renderer::{
2867            PlainDecorator, RichAnnotation, RichDecorator, TaggedLine, TextDecorator,
2868        },
2869    };
2870
2871    /// Specify how images with missing or empty alt text are handled
2872    #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
2873    #[non_exhaustive]
2874    pub enum ImageRenderMode {
2875        /// Ignore `<img>` without alt, or `<svg>` without `<title>`.
2876        #[default]
2877        IgnoreEmpty,
2878        /// Always process images (will be handled by the decorator)
2879        ShowAlways,
2880        /// Use a fixed replacement text (e.g. emoji)
2881        Replace(&'static str),
2882        /// Replace with the last component of the link filename if any
2883        Filename,
2884    }
2885
2886    /// Configure the HTML processing.
2887    pub struct Config<D: TextDecorator> {
2888        decorator: D,
2889
2890        max_wrap_width: Option<usize>,
2891
2892        style: StyleData,
2893        #[cfg(feature = "css")]
2894        use_doc_css: bool,
2895
2896        pad_block_width: bool,
2897
2898        allow_width_overflow: bool,
2899        min_wrap_width: usize,
2900        raw: bool,
2901        draw_borders: bool,
2902        wrap_links: bool,
2903        include_link_footnotes: bool,
2904        use_unicode_strikeout: bool,
2905        image_mode: ImageRenderMode,
2906
2907        #[cfg(feature = "css_ext")]
2908        syntax_highlighters: HighlighterMap,
2909    }
2910
2911    impl<D: TextDecorator> Config<D> {
2912        /// Make the HtmlContext from self.
2913        pub(crate) fn make_context(&self) -> HtmlContext {
2914            HtmlContext {
2915                style_data: self.style.clone(),
2916                #[cfg(feature = "css")]
2917                use_doc_css: self.use_doc_css,
2918
2919                max_wrap_width: self.max_wrap_width,
2920                pad_block_width: self.pad_block_width,
2921                allow_width_overflow: self.allow_width_overflow,
2922                min_wrap_width: self.min_wrap_width,
2923                raw: self.raw,
2924                draw_borders: self.draw_borders,
2925                wrap_links: self.wrap_links,
2926                include_link_footnotes: self.include_link_footnotes,
2927                use_unicode_strikeout: self.use_unicode_strikeout,
2928                image_mode: self.image_mode,
2929
2930                #[cfg(feature = "css_ext")]
2931                syntax_highlighters: self.syntax_highlighters.clone(),
2932            }
2933        }
2934        /// Parse with context.
2935        pub(crate) fn do_parse<R>(&self, context: &mut HtmlContext, input: R) -> Result<RenderTree>
2936        where
2937            R: io::Read,
2938        {
2939            let dom = self.parse_html(input)?;
2940            let render_tree = super::dom_to_render_tree_with_context(
2941                dom.document.clone(),
2942                &mut io::sink(),
2943                context,
2944            )?
2945            .ok_or(Error::Fail)?;
2946            Ok(RenderTree(render_tree))
2947        }
2948
2949        /// Parse the HTML into a DOM structure.
2950        pub fn parse_html<R: io::Read>(&self, mut input: R) -> Result<super::RcDom> {
2951            use html5ever::tendril::TendrilSink;
2952            let opts = super::ParseOpts {
2953                tree_builder: super::TreeBuilderOpts {
2954                    scripting_enabled: false,
2955                    ..Default::default()
2956                },
2957                ..Default::default()
2958            };
2959            Ok(super::parse_document(super::RcDom::default(), opts)
2960                .from_utf8()
2961                .read_from(&mut input)?)
2962        }
2963
2964        /// Convert an HTML DOM into a RenderTree.
2965        pub fn dom_to_render_tree(&self, dom: &super::RcDom) -> Result<RenderTree> {
2966            Ok(RenderTree(
2967                super::dom_to_render_tree_with_context(
2968                    dom.document.clone(),
2969                    &mut io::sink(),
2970                    &mut self.make_context(),
2971                )?
2972                .ok_or(Error::Fail)?,
2973            ))
2974        }
2975
2976        /// Render an existing RenderTree into a string.
2977        pub fn render_to_string(&self, render_tree: RenderTree, width: usize) -> Result<String> {
2978            let s = render_tree
2979                .render_with_context(
2980                    &mut self.make_context(),
2981                    width,
2982                    self.decorator.make_subblock_decorator(),
2983                )?
2984                .into_string()?;
2985            Ok(s)
2986        }
2987
2988        /// Take an existing RenderTree, and returns text wrapped to `width` columns.
2989        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
2990        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
2991        /// the `Vec`.
2992        pub fn render_to_lines(
2993            &self,
2994            render_tree: RenderTree,
2995            width: usize,
2996        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
2997            render_tree
2998                .render_with_context(
2999                    &mut self.make_context(),
3000                    width,
3001                    self.decorator.make_subblock_decorator(),
3002                )?
3003                .into_lines()
3004        }
3005
3006        /// Reads HTML from `input`, and returns a `String` with text wrapped to
3007        /// `width` columns.
3008        pub fn string_from_read<R: std::io::Read>(self, input: R, width: usize) -> Result<String> {
3009            let mut context = self.make_context();
3010            let s = self
3011                .do_parse(&mut context, input)?
3012                .render_with_context(&mut context, width, self.decorator)?
3013                .into_string()?;
3014            Ok(s)
3015        }
3016
3017        /// Reads HTML from `input`, and returns text wrapped to `width` columns.
3018        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3019        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
3020        /// the `Vec`.
3021        pub fn lines_from_read<R: std::io::Read>(
3022            self,
3023            input: R,
3024            width: usize,
3025        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3026            let mut context = self.make_context();
3027            self.do_parse(&mut context, input)?
3028                .render_with_context(&mut context, width, self.decorator)?
3029                .into_lines()
3030        }
3031
3032        #[cfg(feature = "css")]
3033        /// Add some CSS rules which will be used (if supported) with any
3034        /// HTML processed.
3035        pub fn add_css(mut self, css: &str) -> Result<Self> {
3036            self.style.add_user_css(css)?;
3037            Ok(self)
3038        }
3039
3040        #[cfg(feature = "css")]
3041        /// Add some agent CSS rules which will be used (if supported) with any
3042        /// HTML processed.
3043        pub fn add_agent_css(mut self, css: &str) -> Result<Self> {
3044            self.style.add_agent_css(css)?;
3045            Ok(self)
3046        }
3047
3048        #[cfg(feature = "css")]
3049        /// Parse CSS from any \<style\> elements and use supported rules.
3050        pub fn use_doc_css(mut self) -> Self {
3051            self.use_doc_css = true;
3052            self
3053        }
3054
3055        /// Pad lines out to the full render width.
3056        pub fn pad_block_width(mut self) -> Self {
3057            self.pad_block_width = true;
3058            self
3059        }
3060
3061        /// Set the maximum text wrap width.
3062        /// When set, paragraphs will be wrapped to that width even if there
3063        /// is more total width available for rendering.
3064        pub fn max_wrap_width(mut self, wrap_width: usize) -> Self {
3065            self.max_wrap_width = Some(wrap_width);
3066            self
3067        }
3068
3069        /// Allow the output to be wider than the max width.  When enabled,
3070        /// then output wider than the specified width will be returned
3071        /// instead of returning `Err(TooNarrow)` if the output wouldn't
3072        /// otherwise fit.
3073        pub fn allow_width_overflow(mut self) -> Self {
3074            self.allow_width_overflow = true;
3075            self
3076        }
3077
3078        /// Set the minimum width for text wrapping.  The default is 3.
3079        /// Blocks of text will be forced to have at least this width
3080        /// (unless the text inside is less than that).  Increasing this
3081        /// can increase the chance that the width will overflow, leading
3082        /// to a TooNarrow error unless `allow_width_overflow()` is set.
3083        pub fn min_wrap_width(mut self, min_wrap_width: usize) -> Self {
3084            self.min_wrap_width = min_wrap_width;
3085            self
3086        }
3087
3088        /// Raw extraction, ensures text in table cells ends up rendered together
3089        /// This traverses tables as if they had a single column and every cell is its own row.
3090        /// Implies `no_table_borders()`
3091        pub fn raw_mode(mut self, raw: bool) -> Self {
3092            self.raw = raw;
3093            self.draw_borders = false;
3094            self
3095        }
3096
3097        /// Do not render table borders
3098        pub fn no_table_borders(mut self) -> Self {
3099            self.draw_borders = false;
3100            self
3101        }
3102        /// Do not wrap links
3103        pub fn no_link_wrapping(mut self) -> Self {
3104            self.wrap_links = false;
3105            self
3106        }
3107
3108        /// Select whether to use Unicode combining characters to strike out text.
3109        pub fn unicode_strikeout(mut self, use_unicode: bool) -> Self {
3110            self.use_unicode_strikeout = use_unicode;
3111            self
3112        }
3113
3114        /// Make a simple "contains" type rule for an element.
3115        fn make_surround_rule(element: &str, after: bool, content: &str) -> Ruleset {
3116            Ruleset {
3117                selector: Selector {
3118                    components: vec![SelectorComponent::Element(element.into())],
3119                    pseudo_element: Some(if after {
3120                        PseudoElement::After
3121                    } else {
3122                        PseudoElement::Before
3123                    }),
3124                },
3125                styles: vec![StyleDecl {
3126                    style: Style::Content(PseudoContent {
3127                        text: content.into(),
3128                    }),
3129                    importance: Importance::Default,
3130                }],
3131            }
3132        }
3133
3134        /// Decorate <em> etc. similarly to markdown
3135        pub fn do_decorate(mut self) -> Self {
3136            self.style.add_agent_rules(&[
3137                Self::make_surround_rule("em", false, "*"),
3138                Self::make_surround_rule("em", true, "*"),
3139                Self::make_surround_rule("dt", false, "*"),
3140                Self::make_surround_rule("dt", true, "*"),
3141                Self::make_surround_rule("strong", false, "**"),
3142                Self::make_surround_rule("strong", true, "**"),
3143                Self::make_surround_rule("b", false, "**"),
3144                Self::make_surround_rule("b", true, "**"),
3145                Self::make_surround_rule("code", false, "`"),
3146                Self::make_surround_rule("code", true, "`"),
3147            ]);
3148            self
3149        }
3150
3151        /// Add footnotes for hyperlinks
3152        pub fn link_footnotes(mut self, include_footnotes: bool) -> Self {
3153            self.include_link_footnotes = include_footnotes;
3154            self
3155        }
3156
3157        /// Configure how images with no alt text are handled.
3158        pub fn empty_img_mode(mut self, img_mode: ImageRenderMode) -> Self {
3159            self.image_mode = img_mode;
3160            self
3161        }
3162
3163        #[cfg(feature = "css_ext")]
3164        /// Register a named syntax highlighter.
3165        ///
3166        /// The highlighter will be used when a `<pre>` element
3167        /// is styled with `x-syntax: name`
3168        pub fn register_highlighter(
3169            mut self,
3170            name: impl Into<String>,
3171            f: SyntaxHighlighter,
3172        ) -> Self {
3173            use std::rc::Rc;
3174
3175            self.syntax_highlighters.insert(name.into(), Rc::new(f));
3176            self
3177        }
3178    }
3179
3180    impl Config<RichDecorator> {
3181        /// Return coloured text.  `colour_map` is a function which takes
3182        /// a list of `RichAnnotation` and some text, and returns the text
3183        /// with any terminal escapes desired to indicate those annotations
3184        /// (such as colour).
3185        pub fn coloured<R, FMap>(self, input: R, width: usize, colour_map: FMap) -> Result<String>
3186        where
3187            R: std::io::Read,
3188            FMap: Fn(&[RichAnnotation], &str) -> String,
3189        {
3190            let mut context = self.make_context();
3191            let render_tree = self.do_parse(&mut context, input)?;
3192            self.render_coloured(render_tree, width, colour_map)
3193        }
3194
3195        /// Return coloured text from a RenderTree.  `colour_map` is a function which takes a list
3196        /// of `RichAnnotation` and some text, and returns the text with any terminal escapes
3197        /// desired to indicate those annotations (such as colour).
3198        pub fn render_coloured<FMap>(
3199            &self,
3200            render_tree: RenderTree,
3201            width: usize,
3202            colour_map: FMap,
3203        ) -> Result<String>
3204        where
3205            FMap: Fn(&[RichAnnotation], &str) -> String,
3206        {
3207            let lines = self.render_to_lines(render_tree, width)?;
3208
3209            let mut result = String::new();
3210            for line in lines {
3211                for ts in line.tagged_strings() {
3212                    result.push_str(&colour_map(&ts.tag, &ts.s));
3213                }
3214                result.push('\n');
3215            }
3216            Ok(result)
3217        }
3218    }
3219
3220    /// Return a Config initialized with a `RichDecorator`.
3221    pub fn rich() -> Config<RichDecorator> {
3222        with_decorator(RichDecorator::new())
3223    }
3224
3225    /// Return a Config initialized with a `PlainDecorator`.
3226    pub fn plain() -> Config<PlainDecorator> {
3227        with_decorator(PlainDecorator::new())
3228            .do_decorate()
3229            .link_footnotes(true)
3230    }
3231
3232    /// Return a Config initialized with a `PlainDecorator`.
3233    pub fn plain_no_decorate() -> Config<PlainDecorator> {
3234        with_decorator(PlainDecorator::new())
3235    }
3236
3237    /// Return a Config initialized with a custom decorator.
3238    pub fn with_decorator<D: TextDecorator>(decorator: D) -> Config<D> {
3239        Config {
3240            decorator,
3241            style: Default::default(),
3242            #[cfg(feature = "css")]
3243            use_doc_css: false,
3244            max_wrap_width: None,
3245            pad_block_width: false,
3246            allow_width_overflow: false,
3247            min_wrap_width: MIN_WIDTH,
3248            raw: false,
3249            draw_borders: true,
3250            wrap_links: true,
3251            include_link_footnotes: false,
3252            use_unicode_strikeout: true,
3253            image_mode: ImageRenderMode::IgnoreEmpty,
3254            #[cfg(feature = "css_ext")]
3255            syntax_highlighters: Default::default(),
3256        }
3257    }
3258}
3259
3260/// The structure of an HTML document that can be rendered using a [`TextDecorator`][].
3261///
3262/// [`TextDecorator`]: render/text_renderer/trait.TextDecorator.html
3263
3264#[derive(Clone, Debug)]
3265pub struct RenderTree(RenderNode);
3266
3267impl std::fmt::Display for RenderTree {
3268    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3269        writeln!(f, "Render tree:")?;
3270        self.0.write_self(f, 1)
3271    }
3272}
3273
3274impl RenderTree {
3275    /// Render this document using the given `decorator` and wrap it to `width` columns.
3276    fn render_with_context<D: TextDecorator>(
3277        self,
3278        context: &mut HtmlContext,
3279        width: usize,
3280        decorator: D,
3281    ) -> Result<RenderedText<D>> {
3282        if width == 0 {
3283            return Err(Error::TooNarrow);
3284        }
3285        let render_options = RenderOptions {
3286            wrap_width: context.max_wrap_width,
3287            pad_block_width: context.pad_block_width,
3288            allow_width_overflow: context.allow_width_overflow,
3289            raw: context.raw,
3290            draw_borders: context.draw_borders,
3291            wrap_links: context.wrap_links,
3292            include_link_footnotes: context.include_link_footnotes,
3293            use_unicode_strikeout: context.use_unicode_strikeout,
3294            img_mode: context.image_mode,
3295        };
3296        let test_decorator = decorator.make_subblock_decorator();
3297        let builder = SubRenderer::new(width, render_options, decorator);
3298        let builder =
3299            render_tree_to_string(context, builder, &test_decorator, self.0, &mut io::sink())?;
3300        Ok(RenderedText(builder))
3301    }
3302}
3303
3304/// A rendered HTML document.
3305struct RenderedText<D: TextDecorator>(SubRenderer<D>);
3306
3307impl<D: TextDecorator> RenderedText<D> {
3308    /// Convert the rendered HTML document to a string.
3309    fn into_string(self) -> render::Result<String> {
3310        self.0.into_string()
3311    }
3312
3313    /// Convert the rendered HTML document to a vector of lines with the annotations created by the
3314    /// decorator.
3315    fn into_lines(self) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3316        Ok(self
3317            .0
3318            .into_lines()?
3319            .into_iter()
3320            .map(RenderLine::into_tagged_line)
3321            .collect())
3322    }
3323}
3324
3325/// Reads and parses HTML from `input` and prepares a render tree.
3326pub fn parse(input: impl io::Read) -> Result<RenderTree> {
3327    let cfg = config::with_decorator(TrivialDecorator::new());
3328    cfg.do_parse(&mut cfg.make_context(), input)
3329}
3330
3331/// Reads HTML from `input`, decorates it using `decorator`, and
3332/// returns a `String` with text wrapped to `width` columns.
3333pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> Result<String>
3334where
3335    R: io::Read,
3336    D: TextDecorator,
3337{
3338    config::with_decorator(decorator).string_from_read(input, width)
3339}
3340
3341/// Reads HTML from `input`, and returns a `String` with text wrapped to
3342/// `width` columns.
3343pub fn from_read<R>(input: R, width: usize) -> Result<String>
3344where
3345    R: io::Read,
3346{
3347    config::plain().string_from_read(input, width)
3348}
3349
3350/// Reads HTML from `input`, and returns text wrapped to `width` columns.
3351///
3352/// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3353/// of `RichAnnotation`.  The "outer" annotation comes first in the `Vec`.
3354pub fn from_read_rich<R>(input: R, width: usize) -> Result<Vec<TaggedLine<Vec<RichAnnotation>>>>
3355where
3356    R: io::Read,
3357{
3358    config::rich().lines_from_read(input, width)
3359}
3360
3361mod ansi_colours;
3362
3363pub use ansi_colours::from_read_coloured;
3364
3365#[cfg(test)]
3366mod tests;
3367
3368fn calc_ol_prefix_size<D: TextDecorator>(start: i64, num_items: usize, decorator: &D) -> usize {
3369    // The prefix width could be at either end if the start is negative.
3370    let min_number = start;
3371    // Assumption: num_items can't overflow isize.
3372    let max_number = start + (num_items as i64) - 1;
3373
3374    // This assumes that the decorator gives the same width as default.
3375    let prefix_width_min = decorator.ordered_item_prefix(min_number).len();
3376    let prefix_width_max = decorator.ordered_item_prefix(max_number).len();
3377    max(prefix_width_min, prefix_width_max)
3378}