Skip to main content

html2text/
lib.rs

1//! Convert HTML to text formats.
2//!
3//! This crate renders HTML into a text format, wrapped to a specified width.
4//! This can either be plain text or with extra annotations to (for example)
5//! show in a terminal which supports colours.
6//!
7//! # Examples
8//!
9//! ```rust
10//! # use html2text::from_read;
11//! let html = b"
12//!        <ul>
13//!          <li>Item one</li>
14//!          <li>Item two</li>
15//!          <li>Item three</li>
16//!        </ul>";
17//! assert_eq!(from_read(&html[..], 20).unwrap(),
18//!            "\
19//! * Item one
20//! * Item two
21//! * Item three
22//! ");
23//! ```
24//! A couple of simple demonstration programs are included as examples:
25//!
26//! ### html2text
27//!
28//! The simplest example uses `from_read` to convert HTML on stdin into plain
29//! text:
30//!
31//! ```sh
32//! $ cargo run --example html2text < foo.html
33//! [...]
34//! ```
35//!
36//! ### html2term
37//!
38//! A very simple example of using the rich interface (`from_read_rich`) for a
39//! slightly interactive console HTML viewer is provided as `html2term`.
40//!
41//! ```sh
42//! $ cargo run --example html2term foo.html
43//! [...]
44//! ```
45//!
46//! Note that this example takes the HTML file as a parameter so that it can
47//! read keys from stdin.
48//!
49
50#![deny(missing_docs)]
51
52// Check code in README.md
53#[cfg(doctest)]
54#[doc = include_str!("../README.md")]
55struct ReadMe;
56
57#[macro_use]
58mod macros;
59
60pub mod css;
61pub mod render;
62
63/// Extra methods on chars for dealing with special cases with wrapping and whitespace.
64trait WhitespaceExt {
65    /// Returns whether this character always takes space. This is true for non-whitespace and
66    /// non-breaking spaces.
67    fn always_takes_space(&self) -> bool;
68
69    /// Returns true if a word before this character is allowed. This includes most whitespace
70    /// (but not non-breaking space).
71    fn is_wordbreak_point(&self) -> bool;
72}
73
74impl WhitespaceExt for char {
75    fn always_takes_space(&self) -> bool {
76        match *self {
77            '\u{A0}' => true,
78            c if !c.is_whitespace() => true,
79            _ => false,
80        }
81    }
82
83    fn is_wordbreak_point(&self) -> bool {
84        match *self {
85            '\u{00A0}' => false,
86            '\u{200b}' => true,
87            c if c.is_whitespace() => true,
88            _ => false,
89        }
90    }
91}
92
93/// Extra methods for strings
94trait StrExt {
95    /// Trims leading/trailing whitespace expect for hard spaces.
96    fn trim_collapsible_ws(&self) -> &str;
97}
98
99impl StrExt for str {
100    fn trim_collapsible_ws(&self) -> &str {
101        self.trim_matches(|c: char| !c.always_takes_space())
102    }
103}
104
105#[cfg(feature = "css_ext")]
106/// Text style information.
107#[derive(Clone, Debug)]
108#[non_exhaustive]
109pub struct TextStyle {
110    /// The foreground colour
111    pub fg_colour: Colour,
112    /// The background colour, or None.
113    pub bg_colour: Option<Colour>,
114}
115
116#[cfg(feature = "css_ext")]
117impl TextStyle {
118    /// Create a TextStyle from foreground and background colours.
119    pub fn colours(fg_colour: Colour, bg_colour: Colour) -> Self {
120        TextStyle {
121            fg_colour,
122            bg_colour: Some(bg_colour),
123        }
124    }
125
126    /// Create a TextStyle using only a foreground colour.
127    pub fn foreground(fg_colour: Colour) -> Self {
128        TextStyle {
129            fg_colour,
130            bg_colour: None,
131        }
132    }
133}
134
135#[cfg(feature = "css_ext")]
136/// Syntax highlighter function.
137///
138/// Takes a string corresponding to some text to be highlighted, and returns
139/// spans with sub-strs of that text with associated colours.
140pub type SyntaxHighlighter = Box<dyn for<'a> Fn(&'a str) -> Vec<(TextStyle, &'a str)>>;
141
142use markup5ever_rcdom::Node;
143use render::text_renderer::{
144    RenderLine, RenderOptions, RichAnnotation, SubRenderer, TaggedLine, TextRenderer,
145};
146use render::{Renderer, TextDecorator, TrivialDecorator};
147
148use html5ever::driver::ParseOpts;
149use html5ever::parse_document;
150use html5ever::tree_builder::TreeBuilderOpts;
151mod markup5ever_rcdom;
152pub use html5ever::{expanded_name, local_name, namespace_url, ns};
153pub use markup5ever_rcdom::{
154    Handle,
155    NodeData::{Comment, Document, Element},
156    RcDom,
157};
158
159use std::cell::{Cell, RefCell};
160use std::cmp::{max, min};
161use std::collections::{BTreeSet, HashMap};
162#[cfg(feature = "css_ext")]
163use std::ops::Range;
164use std::rc::Rc;
165use unicode_width::UnicodeWidthStr;
166
167use std::io;
168use std::io::Write;
169use std::iter::{once, repeat};
170
171#[derive(Debug, Copy, Clone, Default, PartialEq, Eq)]
172pub(crate) enum WhiteSpace {
173    #[default]
174    Normal,
175    // NoWrap,
176    Pre,
177    #[allow(unused)]
178    PreWrap,
179    // PreLine,
180    // BreakSpaces,
181}
182
183impl WhiteSpace {
184    pub fn preserve_whitespace(&self) -> bool {
185        match self {
186            WhiteSpace::Normal => false,
187            WhiteSpace::Pre | WhiteSpace::PreWrap => true,
188        }
189    }
190    #[allow(unused)]
191    pub fn do_wrap(&self) -> bool {
192        match self {
193            WhiteSpace::Normal | WhiteSpace::PreWrap => true,
194            WhiteSpace::Pre => false,
195        }
196    }
197}
198
199/// An RGB colour value
200#[derive(Copy, Clone, Debug, PartialEq, Eq)]
201pub struct Colour {
202    /// Red value
203    pub r: u8,
204    /// Green value
205    pub g: u8,
206    /// Blue value
207    pub b: u8,
208}
209
210impl std::fmt::Display for Colour {
211    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
212        write!(f, "#{:02x}{:02x}{:02x}", self.r, self.g, self.b)
213    }
214}
215
216#[derive(Debug, Copy, Clone, PartialEq, Eq, Default, PartialOrd)]
217pub(crate) enum StyleOrigin {
218    #[default]
219    None,
220    Agent,
221    #[allow(unused)]
222    User,
223    #[allow(unused)]
224    Author,
225}
226
227#[derive(Debug, Copy, Clone, PartialEq, Eq, Default)]
228pub(crate) struct Specificity {
229    inline: bool,
230    id: u16,
231    class: u16,
232    typ: u16,
233}
234
235impl Specificity {
236    #[cfg(feature = "css")]
237    fn inline() -> Self {
238        Specificity {
239            inline: true,
240            id: 0,
241            class: 0,
242            typ: 0,
243        }
244    }
245}
246
247impl std::ops::Add<&Specificity> for &Specificity {
248    type Output = Specificity;
249
250    fn add(self, rhs: &Specificity) -> Self::Output {
251        Specificity {
252            inline: self.inline || rhs.inline,
253            id: self.id + rhs.id,
254            class: self.class + rhs.class,
255            typ: self.typ + rhs.typ,
256        }
257    }
258}
259
260impl std::ops::AddAssign<&Specificity> for Specificity {
261    fn add_assign(&mut self, rhs: &Specificity) {
262        self.inline = self.inline || rhs.inline;
263        self.id += rhs.id;
264        self.class += rhs.class;
265        self.typ += rhs.typ;
266    }
267}
268
269impl PartialOrd for Specificity {
270    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
271        match self.inline.partial_cmp(&other.inline) {
272            Some(core::cmp::Ordering::Equal) => {}
273            ord => return ord,
274        }
275        match self.id.partial_cmp(&other.id) {
276            Some(core::cmp::Ordering::Equal) => {}
277            ord => return ord,
278        }
279        match self.class.partial_cmp(&other.class) {
280            Some(core::cmp::Ordering::Equal) => {}
281            ord => return ord,
282        }
283        self.typ.partial_cmp(&other.typ)
284    }
285}
286
287#[derive(Clone, Copy, Debug)]
288pub(crate) struct WithSpec<T> {
289    val: Option<T>,
290    origin: StyleOrigin,
291    specificity: Specificity,
292    important: bool,
293}
294impl<T: Clone> WithSpec<T> {
295    pub(crate) fn maybe_update(
296        &mut self,
297        important: bool,
298        origin: StyleOrigin,
299        specificity: Specificity,
300        val: T,
301    ) {
302        if self.val.is_some() {
303            // We already have a value, so need to check.
304            if self.important && !important {
305                // important takes priority over not important.
306                return;
307            }
308            // importance is the same.  Next is checking the origin.
309            {
310                use StyleOrigin::*;
311                match (self.origin, origin) {
312                    (Agent, Agent) | (User, User) | (Author, Author) => {
313                        // They're the same so continue the comparison
314                    }
315                    (mine, theirs) => {
316                        if (important && theirs > mine) || (!important && mine > theirs) {
317                            return;
318                        }
319                    }
320                }
321            }
322            // We're now from the same origin an importance
323            if specificity < self.specificity {
324                return;
325            }
326        }
327        self.val = Some(val);
328        self.origin = origin;
329        self.specificity = specificity;
330        self.important = important;
331    }
332
333    pub fn val(&self) -> Option<&T> {
334        self.val.as_ref()
335    }
336}
337
338impl<T> Default for WithSpec<T> {
339    fn default() -> Self {
340        WithSpec {
341            val: None,
342            origin: StyleOrigin::None,
343            specificity: Default::default(),
344            important: false,
345        }
346    }
347}
348
349#[derive(Debug, Clone, Default)]
350pub(crate) struct ComputedStyle {
351    #[cfg(feature = "css")]
352    /// The computed foreground colour, if any
353    pub(crate) colour: WithSpec<Colour>,
354    #[cfg(feature = "css")]
355    /// The computed background colour, if any
356    pub(crate) bg_colour: WithSpec<Colour>,
357    #[cfg(feature = "css")]
358    /// If set, indicates whether `display: none` or something equivalent applies
359    pub(crate) display: WithSpec<css::Display>,
360    /// The CSS white-space property
361    pub(crate) white_space: WithSpec<WhiteSpace>,
362    /// The CSS content property
363    pub(crate) content: WithSpec<css::PseudoContent>,
364    #[cfg(feature = "css_ext")]
365    pub(crate) syntax: WithSpec<css::SyntaxInfo>,
366
367    /// The CSS content property for ::before
368    pub(crate) content_before: Option<Box<ComputedStyle>>,
369    /// The CSS content property for ::after
370    pub(crate) content_after: Option<Box<ComputedStyle>>,
371
372    /// A non-CSS flag indicating we're inside a <pre>.
373    pub(crate) internal_pre: bool,
374}
375
376impl ComputedStyle {
377    /// Return the style data inherited by children.
378    pub(crate) fn inherit(&self) -> Self {
379        // TODO: clear fields that shouldn't be inherited
380        self.clone()
381    }
382}
383
384/// Errors from reading or rendering HTML
385#[derive(thiserror::Error, Debug)]
386#[non_exhaustive]
387pub enum Error {
388    /// The output width was too narrow to render to.
389    #[error("Output width not wide enough.")]
390    TooNarrow,
391    /// CSS parse error
392    #[error("Invalid CSS")]
393    CssParseError,
394    /// An general error was encountered.
395    #[error("Unknown failure")]
396    Fail,
397    /// An I/O error
398    #[error("I/O error")]
399    IoError(#[from] io::Error),
400}
401
402impl PartialEq for Error {
403    fn eq(&self, other: &Error) -> bool {
404        use Error::*;
405        match (self, other) {
406            (TooNarrow, TooNarrow) => true,
407            #[cfg(feature = "css")]
408            (CssParseError, CssParseError) => true,
409            (Fail, Fail) => true,
410            _ => false,
411        }
412    }
413}
414
415impl Eq for Error {}
416
417type Result<T> = std::result::Result<T, Error>;
418
419const MIN_WIDTH: usize = 3;
420
421/// Size information/estimate
422#[derive(Debug, Copy, Clone, Default)]
423struct SizeEstimate {
424    size: usize,      // Rough overall size
425    min_width: usize, // The narrowest possible
426
427    // The use is specific to the node type.
428    prefix_size: usize,
429}
430
431impl SizeEstimate {
432    /// Combine two estimates into one (add size and take the largest
433    /// min width)
434    fn add(self, other: SizeEstimate) -> SizeEstimate {
435        let min_width = max(self.min_width, other.min_width);
436        SizeEstimate {
437            size: self.size + other.size,
438            min_width,
439            prefix_size: 0,
440        }
441    }
442    /// Combine two estimates into one which need to be side by side.
443    /// The min widths are added.
444    fn add_hor(self, other: SizeEstimate) -> SizeEstimate {
445        SizeEstimate {
446            size: self.size + other.size,
447            min_width: self.min_width + other.min_width,
448            prefix_size: 0,
449        }
450    }
451
452    /// Combine two estimates into one (take max of each)
453    fn max(self, other: SizeEstimate) -> SizeEstimate {
454        SizeEstimate {
455            size: max(self.size, other.size),
456            min_width: max(self.min_width, other.min_width),
457            prefix_size: 0,
458        }
459    }
460}
461
462#[derive(Clone, Debug)]
463/// Render tree table cell
464struct RenderTableCell {
465    colspan: usize,
466    rowspan: usize,
467    content: Vec<RenderNode>,
468    size_estimate: Cell<Option<SizeEstimate>>,
469    col_width: Option<usize>, // Actual width to use
470    x_pos: Option<usize>,     // X location
471    style: ComputedStyle,
472    is_dummy: bool,
473}
474
475impl RenderTableCell {
476    /// Calculate or return the estimate size of the cell
477    fn get_size_estimate(&self) -> SizeEstimate {
478        let Some(size) = self.size_estimate.get() else {
479            let size = self
480                .content
481                .iter()
482                .map(|node| node.get_size_estimate())
483                .fold(Default::default(), SizeEstimate::add);
484            self.size_estimate.set(Some(size));
485            return size;
486        };
487        size
488    }
489
490    /// Make a placeholder cell to cover for a cell above with
491    /// larger rowspan.
492    pub fn dummy(colspan: usize) -> Self {
493        RenderTableCell {
494            colspan,
495            rowspan: 1,
496            content: Default::default(),
497            size_estimate: Cell::new(Some(SizeEstimate::default())),
498            col_width: None,
499            x_pos: None,
500            style: Default::default(),
501            is_dummy: true,
502        }
503    }
504}
505
506#[derive(Clone, Debug)]
507/// Render tree table row
508struct RenderTableRow {
509    cells: Vec<RenderTableCell>,
510    col_sizes: Option<Vec<usize>>,
511    style: ComputedStyle,
512}
513
514impl RenderTableRow {
515    /// Return a mutable iterator over the cells.
516    fn cells(&self) -> std::slice::Iter<'_, RenderTableCell> {
517        self.cells.iter()
518    }
519    /// Return a mutable iterator over the cells.
520    fn cells_mut(&mut self) -> std::slice::IterMut<'_, RenderTableCell> {
521        self.cells.iter_mut()
522    }
523    /// Return an iterator which returns cells by values (removing
524    /// them from the row).
525    fn cells_drain(&mut self) -> impl Iterator<Item = RenderTableCell> + use<> {
526        std::mem::take(&mut self.cells).into_iter()
527    }
528    /// Count the number of cells in the row.
529    /// Takes into account colspan.
530    fn num_cells(&self) -> usize {
531        self.cells.iter().map(|cell| cell.colspan.max(1)).sum()
532    }
533
534    /// Return the contained cells as RenderNodes, annotated with their
535    /// widths if available.  Skips cells with no width allocated.
536    fn into_cells(self, vertical: bool) -> Vec<RenderNode> {
537        let mut result = Vec::new();
538        let mut colno = 0;
539        let col_sizes = self.col_sizes.unwrap();
540        let mut x_pos = 0;
541        for mut cell in self.cells {
542            let colspan = cell.colspan;
543            let col_width = if vertical {
544                col_sizes[colno]
545            } else {
546                col_sizes[colno..colno + cell.colspan].iter().sum::<usize>()
547            };
548            // Skip any zero-width columns
549            if col_width > 0 {
550                let this_col_width = col_width + cell.colspan - 1;
551                cell.col_width = Some(this_col_width);
552                cell.x_pos = Some(x_pos);
553                x_pos += this_col_width + 1;
554                let style = cell.style.clone();
555                result.push(RenderNode::new_styled(
556                    RenderNodeInfo::TableCell(cell),
557                    style,
558                ));
559            }
560            colno += colspan;
561        }
562        result
563    }
564}
565
566#[derive(Clone, Debug)]
567/// A representation of a table render tree with metadata.
568struct RenderTable {
569    rows: Vec<RenderTableRow>,
570    num_columns: usize,
571    size_estimate: Cell<Option<SizeEstimate>>,
572}
573
574impl RenderTable {
575    /// Create a new RenderTable with the given rows
576    fn new(mut rows: Vec<RenderTableRow>) -> RenderTable {
577        // We later on want to allocate a vector sized by the column count,
578        // but occasionally we see something like colspan="1000000000".  We
579        // handle this by remapping the column ids to the smallest values
580        // possible.
581        //
582        // Tables with no explicit colspan will be unchanged, but if there
583        // are multiple columns each covered by a single <td> on every row,
584        // they will be collapsed into a single column.  For example:
585        //
586        //    <td><td colspan=1000><td>
587        //    <td colspan=1000><td><td>
588        //
589        //  becomes the equivalent:
590        //    <td><td colspan=2><td>
591        //    <td colspan=2><td><td>
592
593        // This will include 0 and the index after the last colspan.
594        let mut col_positions = BTreeSet::new();
595        // Cells which have a rowspan > 1 from previous rows.
596        // Each element is (rows_left, colpos, colspan)
597        // Before each row, the overhangs are in reverse order so that
598        // they can be popped off.
599        let mut overhang_cells: Vec<(usize, usize, usize)> = Vec::new();
600        let mut next_overhang_cells = Vec::new();
601        col_positions.insert(0);
602        for row in &mut rows {
603            let mut col = 0;
604            let mut new_cells = Vec::new();
605
606            for cell in row.cells_drain() {
607                while let Some(hanging) = overhang_cells.last() {
608                    if hanging.1 <= col {
609                        new_cells.push(RenderTableCell::dummy(hanging.2));
610                        col += hanging.2;
611                        col_positions.insert(col);
612                        let mut used = overhang_cells.pop().unwrap();
613                        if used.0 > 1 {
614                            used.0 -= 1;
615                            next_overhang_cells.push(used);
616                        }
617                    } else {
618                        break;
619                    }
620                }
621                if cell.rowspan > 1 {
622                    next_overhang_cells.push((cell.rowspan - 1, col, cell.colspan));
623                }
624                col += cell.colspan;
625                col_positions.insert(col);
626                new_cells.push(cell);
627            }
628            // Handle remaining overhanging cells
629            while let Some(mut hanging) = overhang_cells.pop() {
630                new_cells.push(RenderTableCell::dummy(hanging.2));
631                col += hanging.2;
632                col_positions.insert(col);
633                if hanging.0 > 1 {
634                    hanging.0 -= 1;
635                    next_overhang_cells.push(hanging);
636                }
637            }
638
639            row.cells = new_cells;
640            overhang_cells = std::mem::take(&mut next_overhang_cells);
641            overhang_cells.reverse();
642        }
643
644        let colmap: HashMap<_, _> = col_positions
645            .into_iter()
646            .enumerate()
647            .map(|(i, pos)| (pos, i))
648            .collect();
649
650        for row in &mut rows {
651            let mut pos = 0;
652            let mut mapped_pos = 0;
653            for cell in row.cells_mut() {
654                let nextpos = pos + cell.colspan.max(1);
655                let next_mapped_pos = *colmap.get(&nextpos).unwrap();
656                cell.colspan = next_mapped_pos - mapped_pos;
657                pos = nextpos;
658                mapped_pos = next_mapped_pos;
659            }
660        }
661
662        let num_columns = rows.iter().map(|r| r.num_cells()).max().unwrap_or(0);
663        RenderTable {
664            rows,
665            num_columns,
666            size_estimate: Cell::new(None),
667        }
668    }
669
670    /// Return an iterator over the rows.
671    fn rows(&self) -> std::slice::Iter<'_, RenderTableRow> {
672        self.rows.iter()
673    }
674
675    /// Consume this and return a `Vec<RenderNode>` containing the children;
676    /// the children know the column sizes required.
677    fn into_rows(self, col_sizes: Vec<usize>, vert: bool) -> Vec<RenderNode> {
678        self.rows
679            .into_iter()
680            .map(|mut tr| {
681                tr.col_sizes = Some(col_sizes.clone());
682                let style = tr.style.clone();
683                RenderNode::new_styled(RenderNodeInfo::TableRow(tr, vert), style)
684            })
685            .collect()
686    }
687
688    fn calc_size_estimate(&self, _context: &HtmlContext) -> SizeEstimate {
689        if self.num_columns == 0 {
690            let result = SizeEstimate {
691                size: 0,
692                min_width: 0,
693                prefix_size: 0,
694            };
695            self.size_estimate.set(Some(result));
696            return result;
697        }
698        let mut sizes: Vec<SizeEstimate> = vec![Default::default(); self.num_columns];
699
700        // For now, a simple estimate based on adding up sub-parts.
701        for row in self.rows() {
702            let mut colno = 0usize;
703            for cell in row.cells() {
704                let cellsize = cell.get_size_estimate();
705                for colnum in 0..cell.colspan {
706                    sizes[colno + colnum].size += cellsize.size / cell.colspan;
707                    sizes[colno + colnum].min_width = max(
708                        sizes[colno + colnum].min_width,
709                        cellsize.min_width / cell.colspan,
710                    );
711                }
712                colno += cell.colspan;
713            }
714        }
715        let size = sizes.iter().map(|s| s.size).sum::<usize>() + self.num_columns.saturating_sub(1);
716        let min_width = sizes.iter().map(|s| s.min_width).sum::<usize>() + self.num_columns - 1;
717        let result = SizeEstimate {
718            size,
719            min_width,
720            prefix_size: 0,
721        };
722        self.size_estimate.set(Some(result));
723        result
724    }
725}
726
727/// The node-specific information distilled from the DOM.
728#[derive(Clone, Debug)]
729#[non_exhaustive]
730enum RenderNodeInfo {
731    /// Some text.
732    Text(String),
733    /// A group of nodes collected together.
734    Container(Vec<RenderNode>),
735    /// A link with contained nodes
736    Link(String, Vec<RenderNode>),
737    /// An emphasised region
738    Em(Vec<RenderNode>),
739    /// A strong region
740    Strong(Vec<RenderNode>),
741    /// A struck out region
742    Strikeout(Vec<RenderNode>),
743    /// A code region
744    Code(Vec<RenderNode>),
745    /// An image (src, title)
746    Img(String, String),
747    /// An inline SVG (title)
748    Svg(String),
749    /// A block element with children
750    Block(Vec<RenderNode>),
751    /// A header (h1, h2, ...) with children
752    Header(usize, Vec<RenderNode>),
753    /// A Div element with children
754    Div(Vec<RenderNode>),
755    /// A blockquote
756    BlockQuote(Vec<RenderNode>),
757    /// An unordered list
758    Ul(Vec<RenderNode>),
759    /// An ordered list
760    Ol(i64, Vec<RenderNode>),
761    /// A description list (containing Dt or Dd)
762    Dl(Vec<RenderNode>),
763    /// A term (from a `<dt>`)
764    Dt(Vec<RenderNode>),
765    /// A definition (from a `<dl>`)
766    Dd(Vec<RenderNode>),
767    /// A line break
768    Break,
769    /// A table
770    Table(RenderTable),
771    /// A set of table rows (from either `<thead>` or `<tbody>`
772    TableBody(Vec<RenderTableRow>),
773    /// Table row (must only appear within a table body)
774    /// If the boolean is true, then the cells are drawn vertically
775    /// instead of horizontally (because of space).
776    TableRow(RenderTableRow, bool),
777    /// Table cell (must only appear within a table row)
778    TableCell(RenderTableCell),
779    /// Start of a named HTML fragment
780    FragStart(String),
781    /// A list item
782    ListItem(Vec<RenderNode>),
783    /// Superscript text
784    Sup(Vec<RenderNode>),
785}
786
787/// Common fields from a node.
788#[derive(Clone, Debug)]
789struct RenderNode {
790    size_estimate: Cell<Option<SizeEstimate>>,
791    info: RenderNodeInfo,
792    style: ComputedStyle,
793}
794
795impl RenderNode {
796    /// Create a node from the RenderNodeInfo.
797    fn new(info: RenderNodeInfo) -> RenderNode {
798        RenderNode {
799            size_estimate: Cell::new(None),
800            info,
801            style: Default::default(),
802        }
803    }
804
805    /// Create a node from the RenderNodeInfo.
806    fn new_styled(info: RenderNodeInfo, style: ComputedStyle) -> RenderNode {
807        RenderNode {
808            size_estimate: Cell::new(None),
809            info,
810            style,
811        }
812    }
813
814    /// Get a size estimate
815    fn get_size_estimate(&self) -> SizeEstimate {
816        self.size_estimate.get().unwrap()
817    }
818
819    /// Calculate the size of this node.
820    fn calc_size_estimate<D: TextDecorator>(
821        &self,
822        context: &HtmlContext,
823        decorator: &D,
824    ) -> SizeEstimate {
825        // If it's already calculated, then just return the answer.
826        if let Some(s) = self.size_estimate.get() {
827            return s;
828        };
829
830        use RenderNodeInfo::*;
831
832        let recurse = |node: &RenderNode| node.calc_size_estimate(context, decorator);
833
834        // Otherwise, make an estimate.
835        let estimate = match self.info {
836            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
837                use unicode_width::UnicodeWidthChar;
838                let mut len = 0;
839                let mut in_whitespace = false;
840                for c in t.trim_collapsible_ws().chars() {
841                    let is_collapsible_ws = !c.always_takes_space();
842                    if !is_collapsible_ws {
843                        len += UnicodeWidthChar::width(c).unwrap_or(0);
844                        // Count the preceding whitespace as one.
845                        if in_whitespace {
846                            len += 1;
847                        }
848                    }
849                    in_whitespace = is_collapsible_ws;
850                }
851                // Add one for preceding whitespace, unless the node is otherwise empty.
852                if let Some(true) = t.chars().next().map(|c| !c.always_takes_space()) {
853                    if len > 0 {
854                        len += 1;
855                    }
856                }
857                if let Img(_, _) = self.info {
858                    len += 2;
859                }
860                SizeEstimate {
861                    size: len,
862                    min_width: len.min(context.min_wrap_width),
863                    prefix_size: 0,
864                }
865            }
866
867            Container(ref v) | Em(ref v) | Strong(ref v) | Strikeout(ref v) | Code(ref v)
868            | Block(ref v) | Div(ref v) | Dl(ref v) | Dt(ref v) | ListItem(ref v) | Sup(ref v) => v
869                .iter()
870                .map(recurse)
871                .fold(Default::default(), SizeEstimate::add),
872            Link(ref _target, ref v) => v
873                .iter()
874                .map(recurse)
875                .fold(Default::default(), SizeEstimate::add)
876                .add(SizeEstimate {
877                    size: 5,
878                    min_width: 5,
879                    prefix_size: 0,
880                }),
881            Dd(ref v) | BlockQuote(ref v) | Ul(ref v) => {
882                let prefix = match self.info {
883                    Dd(_) => "  ".into(),
884                    BlockQuote(_) => decorator.quote_prefix(),
885                    Ul(_) => decorator.unordered_item_prefix(),
886                    _ => unreachable!(),
887                };
888                let prefix_width = UnicodeWidthStr::width(prefix.as_str());
889                let mut size = v
890                    .iter()
891                    .map(recurse)
892                    .fold(Default::default(), SizeEstimate::add)
893                    .add_hor(SizeEstimate {
894                        size: prefix_width,
895                        min_width: prefix_width,
896                        prefix_size: 0,
897                    });
898                size.prefix_size = prefix_width;
899                size
900            }
901            Ol(i, ref v) => {
902                let prefix_size = calc_ol_prefix_size(i, v.len(), decorator);
903                let mut result = v
904                    .iter()
905                    .map(recurse)
906                    .fold(Default::default(), SizeEstimate::add)
907                    .add_hor(SizeEstimate {
908                        size: prefix_size,
909                        min_width: prefix_size,
910                        prefix_size: 0,
911                    });
912                result.prefix_size = prefix_size;
913                result
914            }
915            Header(level, ref v) => {
916                let prefix_size = decorator.header_prefix(level).len();
917                let mut size = v
918                    .iter()
919                    .map(recurse)
920                    .fold(Default::default(), SizeEstimate::add)
921                    .add_hor(SizeEstimate {
922                        size: prefix_size,
923                        min_width: prefix_size,
924                        prefix_size: 0,
925                    });
926                size.prefix_size = prefix_size;
927                size
928            }
929            Break => SizeEstimate {
930                size: 1,
931                min_width: 1,
932                prefix_size: 0,
933            },
934            Table(ref t) => t.calc_size_estimate(context),
935            TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
936            FragStart(_) => Default::default(),
937        };
938        self.size_estimate.set(Some(estimate));
939        estimate
940    }
941
942    /// Return true if this node is definitely empty.  This is used to quickly
943    /// remove e.g. links with no anchor text in most cases, but can't recurse
944    /// and look more deeply.
945    fn is_shallow_empty(&self) -> bool {
946        use RenderNodeInfo::*;
947
948        // Otherwise, make an estimate.
949        match self.info {
950            Text(ref t) | Img(_, ref t) | Svg(ref t) => {
951                let len = t.trim().len();
952                len == 0
953            }
954
955            Container(ref v)
956            | Link(_, ref v)
957            | Em(ref v)
958            | Strong(ref v)
959            | Strikeout(ref v)
960            | Code(ref v)
961            | Block(ref v)
962            | ListItem(ref v)
963            | Div(ref v)
964            | BlockQuote(ref v)
965            | Dl(ref v)
966            | Dt(ref v)
967            | Dd(ref v)
968            | Ul(ref v)
969            | Ol(_, ref v)
970            | Sup(ref v) => v.is_empty(),
971            Header(_level, ref v) => v.is_empty(),
972            Break => true,
973            Table(ref _t) => false,
974            TableRow(..) | TableBody(_) | TableCell(_) => false,
975            FragStart(_) => true,
976        }
977    }
978
979    fn write_container(
980        &self,
981        name: &str,
982        items: &[RenderNode],
983        f: &mut std::fmt::Formatter,
984        indent: usize,
985    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
986        writeln!(f, "{:indent$}{name}:", "")?;
987        for item in items {
988            item.write_self(f, indent + 1)?;
989        }
990        Ok(())
991    }
992    fn write_style(
993        f: &mut std::fmt::Formatter,
994        indent: usize,
995        style: &ComputedStyle,
996    ) -> std::result::Result<(), std::fmt::Error> {
997        use std::fmt::Write;
998        let mut stylestr = String::new();
999
1000        #[cfg(feature = "css")]
1001        {
1002            if let Some(col) = style.colour.val() {
1003                write!(&mut stylestr, " colour={:?}", col)?;
1004            }
1005            if let Some(col) = style.bg_colour.val() {
1006                write!(&mut stylestr, " bg_colour={:?}", col)?;
1007            }
1008            if let Some(val) = style.display.val() {
1009                write!(&mut stylestr, " disp={:?}", val)?;
1010            }
1011        }
1012        if let Some(ws) = style.white_space.val() {
1013            write!(&mut stylestr, " white_space={:?}", ws)?;
1014        }
1015        if style.internal_pre {
1016            write!(&mut stylestr, " internal_pre")?;
1017        }
1018        if !stylestr.is_empty() {
1019            writeln!(f, "{:indent$}[Style:{stylestr}", "")?;
1020        }
1021        Ok(())
1022    }
1023    fn write_self(
1024        &self,
1025        f: &mut std::fmt::Formatter,
1026        indent: usize,
1027    ) -> std::prelude::v1::Result<(), std::fmt::Error> {
1028        Self::write_style(f, indent, &self.style)?;
1029
1030        match &self.info {
1031            RenderNodeInfo::Text(s) => writeln!(f, "{:indent$}{s:?}", "")?,
1032            RenderNodeInfo::Container(v) => {
1033                self.write_container("Container", v, f, indent)?;
1034            }
1035            RenderNodeInfo::Link(targ, v) => {
1036                self.write_container(&format!("Link({})", targ), v, f, indent)?;
1037            }
1038            RenderNodeInfo::Em(v) => {
1039                self.write_container("Em", v, f, indent)?;
1040            }
1041            RenderNodeInfo::Strong(v) => {
1042                self.write_container("Strong", v, f, indent)?;
1043            }
1044            RenderNodeInfo::Strikeout(v) => {
1045                self.write_container("Strikeout", v, f, indent)?;
1046            }
1047            RenderNodeInfo::Code(v) => {
1048                self.write_container("Code", v, f, indent)?;
1049            }
1050            RenderNodeInfo::Img(src, title) => {
1051                writeln!(f, "{:indent$}Img src={:?} title={:?}:", "", src, title)?;
1052            }
1053            RenderNodeInfo::Svg(title) => {
1054                writeln!(f, "{:indent$}Svg title={:?}:", "", title)?;
1055            }
1056            RenderNodeInfo::Block(v) => {
1057                self.write_container("Block", v, f, indent)?;
1058            }
1059            RenderNodeInfo::Header(depth, v) => {
1060                self.write_container(&format!("Header({})", depth), v, f, indent)?;
1061            }
1062            RenderNodeInfo::Div(v) => {
1063                self.write_container("Div", v, f, indent)?;
1064            }
1065            RenderNodeInfo::BlockQuote(v) => {
1066                self.write_container("BlockQuote", v, f, indent)?;
1067            }
1068            RenderNodeInfo::Ul(v) => {
1069                self.write_container("Ul", v, f, indent)?;
1070            }
1071            RenderNodeInfo::Ol(start, v) => {
1072                self.write_container(&format!("Ol({})", start), v, f, indent)?;
1073            }
1074            RenderNodeInfo::Dl(v) => {
1075                self.write_container("Dl", v, f, indent)?;
1076            }
1077            RenderNodeInfo::Dt(v) => {
1078                self.write_container("Dt", v, f, indent)?;
1079            }
1080            RenderNodeInfo::Dd(v) => {
1081                self.write_container("Dd", v, f, indent)?;
1082            }
1083            RenderNodeInfo::Break => {
1084                writeln!(f, "{:indent$}Break", "", indent = indent)?;
1085            }
1086            RenderNodeInfo::Table(rows) => {
1087                writeln!(f, "{:indent$}Table ({} cols):", "", rows.num_columns)?;
1088                for rtr in &rows.rows {
1089                    Self::write_style(f, indent + 1, &rtr.style)?;
1090                    writeln!(
1091                        f,
1092                        "{:width$}Row ({} cells):",
1093                        "",
1094                        rtr.cells.len(),
1095                        width = indent + 1
1096                    )?;
1097                    for cell in &rtr.cells {
1098                        Self::write_style(f, indent + 2, &cell.style)?;
1099                        writeln!(
1100                            f,
1101                            "{:width$}Cell colspan={} width={:?}:",
1102                            "",
1103                            cell.colspan,
1104                            cell.col_width,
1105                            width = indent + 2
1106                        )?;
1107                        for node in &cell.content {
1108                            node.write_self(f, indent + 3)?;
1109                        }
1110                    }
1111                }
1112            }
1113            RenderNodeInfo::TableBody(_) => todo!(),
1114            RenderNodeInfo::TableRow(_, _) => todo!(),
1115            RenderNodeInfo::TableCell(_) => todo!(),
1116            RenderNodeInfo::FragStart(frag) => {
1117                writeln!(f, "{:indent$}FragStart({}):", "", frag)?;
1118            }
1119            RenderNodeInfo::ListItem(v) => {
1120                self.write_container("ListItem", v, f, indent)?;
1121            }
1122            RenderNodeInfo::Sup(v) => {
1123                self.write_container("Sup", v, f, indent)?;
1124            }
1125        }
1126        Ok(())
1127    }
1128}
1129
1130fn precalc_size_estimate<'a, D: TextDecorator>(
1131    node: &'a RenderNode,
1132    context: &mut HtmlContext,
1133    decorator: &'a D,
1134) -> TreeMapResult<'a, HtmlContext, &'a RenderNode, ()> {
1135    use RenderNodeInfo::*;
1136    if node.size_estimate.get().is_some() {
1137        return TreeMapResult::Nothing;
1138    }
1139    match node.info {
1140        Text(_) | Img(_, _) | Svg(_) | Break | FragStart(_) => {
1141            let _ = node.calc_size_estimate(context, decorator);
1142            TreeMapResult::Nothing
1143        }
1144
1145        Container(ref v)
1146        | Link(_, ref v)
1147        | Em(ref v)
1148        | Strong(ref v)
1149        | Strikeout(ref v)
1150        | Code(ref v)
1151        | Block(ref v)
1152        | ListItem(ref v)
1153        | Div(ref v)
1154        | BlockQuote(ref v)
1155        | Ul(ref v)
1156        | Ol(_, ref v)
1157        | Dl(ref v)
1158        | Dt(ref v)
1159        | Dd(ref v)
1160        | Sup(ref v)
1161        | Header(_, ref v) => TreeMapResult::PendingChildren {
1162            children: v.iter().collect(),
1163            cons: Box::new(move |context, _cs| {
1164                node.calc_size_estimate(context, decorator);
1165                Ok(None)
1166            }),
1167            prefn: None,
1168            postfn: None,
1169        },
1170        Table(ref t) => {
1171            /* Return all the indirect children which are RenderNodes. */
1172            let mut children = Vec::new();
1173            for row in &t.rows {
1174                for cell in &row.cells {
1175                    children.extend(cell.content.iter());
1176                }
1177            }
1178            TreeMapResult::PendingChildren {
1179                children,
1180                cons: Box::new(move |context, _cs| {
1181                    node.calc_size_estimate(context, decorator);
1182                    Ok(None)
1183                }),
1184                prefn: None,
1185                postfn: None,
1186            }
1187        }
1188        TableRow(..) | TableBody(_) | TableCell(_) => unimplemented!(),
1189    }
1190}
1191
1192/// Convert a table into a RenderNode
1193fn table_to_render_tree<'a, T: Write>(
1194    input: RenderInput,
1195    computed: ComputedStyle,
1196    _err_out: &mut T,
1197) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1198    pending(input, move |_, rowset| {
1199        let mut rows = vec![];
1200        for bodynode in rowset {
1201            if let RenderNodeInfo::TableBody(body) = bodynode.info {
1202                rows.extend(body);
1203            } else {
1204                html_trace!("Found in table: {:?}", bodynode.info);
1205            }
1206        }
1207        if rows.is_empty() {
1208            None
1209        } else {
1210            Some(RenderNode::new_styled(
1211                RenderNodeInfo::Table(RenderTable::new(rows)),
1212                computed,
1213            ))
1214        }
1215    })
1216}
1217
1218/// Add rows from a thead or tbody.
1219fn tbody_to_render_tree<'a, T: Write>(
1220    input: RenderInput,
1221    computed: ComputedStyle,
1222    _err_out: &mut T,
1223) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1224    pending_noempty(input, move |_, rowchildren| {
1225        let mut rows = rowchildren
1226            .into_iter()
1227            .flat_map(|rownode| {
1228                if let RenderNodeInfo::TableRow(row, _) = rownode.info {
1229                    Some(row)
1230                } else {
1231                    html_trace!("  [[tbody child: {:?}]]", rownode);
1232                    None
1233                }
1234            })
1235            .collect::<Vec<_>>();
1236
1237        // Handle colspan=0 by replacing it.
1238        // Get a list of (has_zero_colspan, sum_colspan)
1239        let num_columns = rows
1240            .iter()
1241            .map(|row| {
1242                row.cells()
1243                    // Treat the column as having colspan 1 for initial counting.
1244                    .map(|cell| (cell.colspan == 0, cell.colspan.max(1)))
1245                    .fold((false, 0), |a, b| (a.0 || b.0, a.1 + b.1))
1246            })
1247            .collect::<Vec<_>>();
1248
1249        let max_columns = num_columns.iter().map(|(_, span)| span).max().unwrap_or(&1);
1250
1251        for (i, &(has_zero, num_cols)) in num_columns.iter().enumerate() {
1252            // Note this won't be sensible if more than one column has colspan=0,
1253            // but that's not very well defined anyway.
1254            if has_zero {
1255                for cell in rows[i].cells_mut() {
1256                    if cell.colspan == 0 {
1257                        // +1 because we said it had 1 to start with
1258                        cell.colspan = max_columns - num_cols + 1;
1259                    }
1260                }
1261            }
1262        }
1263
1264        Some(RenderNode::new_styled(
1265            RenderNodeInfo::TableBody(rows),
1266            computed,
1267        ))
1268    })
1269}
1270
1271/// Convert a table row to a RenderTableRow
1272fn tr_to_render_tree<'a, T: Write>(
1273    input: RenderInput,
1274    computed: ComputedStyle,
1275    _err_out: &mut T,
1276) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1277    pending(input, move |_, cellnodes| {
1278        let cells = cellnodes
1279            .into_iter()
1280            .flat_map(|cellnode| {
1281                if let RenderNodeInfo::TableCell(cell) = cellnode.info {
1282                    Some(cell)
1283                } else {
1284                    html_trace!("  [[tr child: {:?}]]", cellnode);
1285                    None
1286                }
1287            })
1288            .collect();
1289        let style = computed.clone();
1290        Some(RenderNode::new_styled(
1291            RenderNodeInfo::TableRow(
1292                RenderTableRow {
1293                    cells,
1294                    col_sizes: None,
1295                    style,
1296                },
1297                false,
1298            ),
1299            computed,
1300        ))
1301    })
1302}
1303
1304/// Convert a single table cell to a render node.
1305fn td_to_render_tree<'a, T: Write>(
1306    input: RenderInput,
1307    computed: ComputedStyle,
1308    _err_out: &mut T,
1309) -> TreeMapResult<'a, HtmlContext, RenderInput, RenderNode> {
1310    let mut colspan = 1;
1311    let mut rowspan = 1;
1312    if let Element { ref attrs, .. } = input.handle.data {
1313        for attr in attrs.borrow().iter() {
1314            if &attr.name.local == "colspan" {
1315                let v: &str = &attr.value;
1316                colspan = v.parse().unwrap_or(1);
1317            }
1318            if &attr.name.local == "rowspan" {
1319                let v: &str = &attr.value;
1320                rowspan = v.parse().unwrap_or(1);
1321            }
1322        }
1323    }
1324    pending(input, move |_, children| {
1325        let style = computed.clone();
1326        Some(RenderNode::new_styled(
1327            RenderNodeInfo::TableCell(RenderTableCell {
1328                colspan,
1329                rowspan,
1330                content: children,
1331                size_estimate: Cell::new(None),
1332                col_width: None,
1333                x_pos: None,
1334                style,
1335                is_dummy: false,
1336            }),
1337            computed,
1338        ))
1339    })
1340}
1341
1342/// A reducer which combines results from mapping children into
1343/// the result for the current node.  Takes a context and a
1344/// vector of results and returns a new result (or nothing).
1345type ResultReducer<'a, C, R> = dyn FnOnce(&mut C, Vec<R>) -> Result<Option<R>> + 'a;
1346
1347/// A closure to call before processing a child node.
1348type ChildPreFn<C, N> = dyn Fn(&mut C, &N) -> Result<()>;
1349
1350/// A closure to call after processing a child node,
1351/// before adding the result to the processed results
1352/// vector.
1353type ChildPostFn<C, R> = dyn Fn(&mut C, &R) -> Result<()>;
1354
1355/// The result of trying to render one node.
1356enum TreeMapResult<'a, C, N, R> {
1357    /// A completed result.
1358    Finished(R),
1359    /// Deferred completion - can be turned into a result
1360    /// once the vector of children are processed.
1361    PendingChildren {
1362        children: Vec<N>,
1363        cons: Box<ResultReducer<'a, C, R>>,
1364        prefn: Option<Box<ChildPreFn<C, N>>>,
1365        postfn: Option<Box<ChildPostFn<C, R>>>,
1366    },
1367    /// Nothing (e.g. a comment or other ignored element).
1368    Nothing,
1369}
1370
1371fn tree_map_reduce<'a, C, N, R, M>(
1372    context: &mut C,
1373    top: N,
1374    mut process_node: M,
1375) -> Result<Option<R>>
1376where
1377    M: FnMut(&mut C, N) -> Result<TreeMapResult<'a, C, N, R>>,
1378{
1379    /// A node partially decoded, waiting for its children to
1380    /// be processed.
1381    struct PendingNode<'a, C, R, N> {
1382        /// How to make the node once finished
1383        construct: Box<ResultReducer<'a, C, R>>,
1384        /// Called before processing each child
1385        prefn: Option<Box<ChildPreFn<C, N>>>,
1386        /// Called after processing each child
1387        postfn: Option<Box<ChildPostFn<C, R>>>,
1388        /// Children already processed
1389        children: Vec<R>,
1390        /// Iterator of child nodes not yet processed
1391        to_process: std::vec::IntoIter<N>,
1392    }
1393
1394    let mut last = PendingNode {
1395        // We only expect one child, which we'll just return.
1396        construct: Box::new(|_, mut cs| Ok(cs.pop())),
1397        prefn: None,
1398        postfn: None,
1399        children: Vec::new(),
1400        to_process: vec![top].into_iter(),
1401    };
1402    let mut pending_stack = Vec::new();
1403    loop {
1404        // Get the next child node to process
1405        while let Some(h) = last.to_process.next() {
1406            if let Some(f) = &last.prefn {
1407                f(context, &h)?;
1408            }
1409            match process_node(context, h)? {
1410                TreeMapResult::Finished(result) => {
1411                    if let Some(f) = &last.postfn {
1412                        f(context, &result)?;
1413                    }
1414                    last.children.push(result);
1415                }
1416                TreeMapResult::PendingChildren {
1417                    children,
1418                    cons,
1419                    prefn,
1420                    postfn,
1421                } => {
1422                    pending_stack.push(last);
1423                    last = PendingNode {
1424                        construct: cons,
1425                        prefn,
1426                        postfn,
1427                        children: Vec::new(),
1428                        to_process: children.into_iter(),
1429                    };
1430                }
1431                TreeMapResult::Nothing => {}
1432            };
1433        }
1434        // No more children, so finally construct the parent.
1435        if let Some(mut parent) = pending_stack.pop() {
1436            if let Some(node) = (last.construct)(context, last.children)? {
1437                if let Some(f) = &parent.postfn {
1438                    f(context, &node)?;
1439                }
1440                parent.children.push(node);
1441            }
1442            last = parent;
1443            continue;
1444        }
1445        // Finished the whole stack!
1446        break Ok((last.construct)(context, last.children)?);
1447    }
1448}
1449
1450#[cfg(feature = "css_ext")]
1451#[derive(Clone, Default)]
1452struct HighlighterMap {
1453    map: HashMap<String, Rc<SyntaxHighlighter>>,
1454}
1455
1456#[cfg(feature = "css_ext")]
1457impl HighlighterMap {
1458    pub fn get(&self, name: &str) -> Option<Rc<SyntaxHighlighter>> {
1459        self.map.get(name).cloned()
1460    }
1461
1462    fn insert(&mut self, name: impl Into<String>, f: Rc<SyntaxHighlighter>) {
1463        self.map.insert(name.into(), f);
1464    }
1465}
1466
1467#[cfg(feature = "css_ext")]
1468impl std::fmt::Debug for HighlighterMap {
1469    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1470        f.debug_struct("HighlighterMap")
1471            .field("map", &self.map.keys().collect::<Vec<_>>())
1472            .finish()
1473    }
1474}
1475
1476#[cfg(feature = "css_ext")]
1477impl PartialEq for HighlighterMap {
1478    fn eq(&self, _other: &Self) -> bool {
1479        todo!()
1480    }
1481}
1482
1483#[cfg(feature = "css_ext")]
1484impl Eq for HighlighterMap {}
1485
1486#[derive(Debug, PartialEq, Eq)]
1487struct HtmlContext {
1488    style_data: css::StyleData,
1489    #[cfg(feature = "css")]
1490    use_doc_css: bool,
1491
1492    max_wrap_width: Option<usize>,
1493    pad_block_width: bool,
1494    allow_width_overflow: bool,
1495    min_wrap_width: usize,
1496    raw: bool,
1497    draw_borders: bool,
1498    wrap_links: bool,
1499    include_link_footnotes: bool,
1500    use_unicode_strikeout: bool,
1501    image_mode: config::ImageRenderMode,
1502
1503    #[cfg(feature = "xml")]
1504    xml_mode: config::XmlMode,
1505
1506    #[cfg(feature = "css_ext")]
1507    syntax_highlighters: HighlighterMap,
1508}
1509
1510// Input to render tree conversion.
1511struct RenderInput {
1512    handle: Handle,
1513    parent_style: Rc<ComputedStyle>,
1514    // Overlay styles from syntax highlighting.
1515    #[cfg(feature = "css_ext")]
1516    extra_styles: RefCell<Vec<(Range<usize>, TextStyle)>>,
1517    // Map from node to the length of enclosed text nodes.
1518    node_lengths: Rc<RefCell<HashMap<*const Node, usize>>>,
1519}
1520
1521impl RenderInput {
1522    fn new(handle: Handle, parent_style: Rc<ComputedStyle>) -> Self {
1523        RenderInput {
1524            handle,
1525            parent_style,
1526            #[cfg(feature = "css_ext")]
1527            extra_styles: Default::default(),
1528            node_lengths: Default::default(),
1529        }
1530    }
1531
1532    #[cfg(feature = "css_ext")]
1533    fn set_syntax_info(&self, full_text: &str, highlighted: Vec<(TextStyle, &str)>) {
1534        let mut node_styles = Vec::new();
1535
1536        // Turn the returned strings into offsets into full_text.  We assume
1537        // we can maintain relative offsets as we step through the tree rendering.
1538        for (style, s) in highlighted {
1539            fn get_offset(full: &str, sub: &str) -> Option<Range<usize>> {
1540                // This looks scary, but if we get this wrong the worst case is
1541                // that we end up panicking when using the offsets.
1542                let full_start = full.as_ptr() as usize;
1543                let full_end = full_start + full.len();
1544                let sub_start = sub.as_ptr() as usize;
1545                let sub_end = sub_start + sub.len();
1546
1547                if sub_start >= full_start && sub_end <= full_end {
1548                    Some((sub_start - full_start)..(sub_end - full_start))
1549                } else {
1550                    None
1551                }
1552            }
1553
1554            if let Some(offset_range) = get_offset(full_text, s) {
1555                node_styles.push((offset_range, style));
1556            } // else we ignore the highlight.
1557        }
1558        node_styles.sort_by_key(|r| (r.0.start, r.0.end));
1559        *self.extra_styles.borrow_mut() = node_styles;
1560    }
1561
1562    // Return the children in the right form
1563    #[allow(clippy::mut_range_bound)]
1564    fn children(&self) -> Vec<RenderInput> {
1565        #[cfg(feature = "css_ext")]
1566        if !self.extra_styles.borrow().is_empty() {
1567            let mut offset = 0;
1568            let mut result = Vec::new();
1569            let mut start_style_index = 0;
1570            let node_lengths = self.node_lengths.borrow();
1571            let extra_styles = self.extra_styles.borrow();
1572            for child in &*self.handle.children.borrow() {
1573                let end_offset = offset + node_lengths.get(&Rc::as_ptr(child)).unwrap();
1574                let mut child_extra_styles = Vec::new();
1575                for es_idx in start_style_index..extra_styles.len() {
1576                    let mut style_range = extra_styles[es_idx].0.clone();
1577                    if style_range.start >= end_offset {
1578                        // We've gone too far.
1579                        break;
1580                    }
1581                    if style_range.end <= offset {
1582                        // We don't need to look at this again
1583                        // Note this is here to restart this loop in a different place
1584                        // in the next run of the outer loop; hence allowing
1585                        // clippy::mut_range_bound on the function.
1586                        start_style_index = es_idx;
1587                    } else {
1588                        // This piece must overlap!
1589                        // Clip the range to this node.
1590                        style_range.start = style_range.start.max(offset) - offset;
1591                        style_range.end = style_range.end.min(end_offset) - offset;
1592
1593                        child_extra_styles.push((style_range, extra_styles[es_idx].1.clone()));
1594                    }
1595                }
1596                result.push(RenderInput {
1597                    handle: Rc::clone(child),
1598                    parent_style: Rc::clone(&self.parent_style),
1599                    extra_styles: RefCell::new(child_extra_styles),
1600                    node_lengths: self.node_lengths.clone(),
1601                });
1602                offset = end_offset;
1603            }
1604            return result;
1605        }
1606
1607        // Simple case, and we might not have the node lengths.
1608        self.handle
1609            .children
1610            .borrow()
1611            .iter()
1612            .map(|child| RenderInput {
1613                handle: child.clone(),
1614                parent_style: Rc::clone(&self.parent_style),
1615                #[cfg(feature = "css_ext")]
1616                extra_styles: Default::default(),
1617                node_lengths: self.node_lengths.clone(),
1618            })
1619            .collect()
1620    }
1621
1622    #[cfg(feature = "css_ext")]
1623    fn do_extract_text(
1624        out: &mut String,
1625        handle: &Handle,
1626        length_map: &mut HashMap<*const Node, usize>,
1627    ) {
1628        match handle.data {
1629            markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
1630                let s: &str = &tstr.borrow();
1631                out.push_str(s);
1632                length_map.entry(Rc::as_ptr(handle)).or_insert(s.len());
1633            }
1634            _ => {
1635                for child in handle.children.borrow().iter() {
1636                    let len_before = out.len();
1637                    RenderInput::do_extract_text(out, child, length_map);
1638                    let len_after = out.len();
1639                    length_map
1640                        .entry(Rc::as_ptr(child))
1641                        .or_insert(len_after - len_before);
1642                }
1643            }
1644        }
1645    }
1646
1647    #[cfg(feature = "css_ext")]
1648    /// Return a full String, and a list of where substrings came from:
1649    ///
1650    fn extract_raw_text(&self) -> String {
1651        let mut result = String::new();
1652        RenderInput::do_extract_text(
1653            &mut result,
1654            &self.handle,
1655            &mut self.node_lengths.borrow_mut(),
1656        );
1657        result
1658    }
1659}
1660
1661fn dom_to_render_tree_with_context<T: Write>(
1662    handle: Handle,
1663    err_out: &mut T,
1664    context: &mut HtmlContext,
1665) -> Result<Option<RenderNode>> {
1666    html_trace!("### dom_to_render_tree: HTML: {:?}", handle);
1667    #[cfg(feature = "css")]
1668    if context.use_doc_css {
1669        let mut doc_style_data = css::dom_extract::dom_to_stylesheet(handle.clone(), err_out)?;
1670        doc_style_data.merge(std::mem::take(&mut context.style_data));
1671        context.style_data = doc_style_data;
1672    }
1673
1674    let parent_style = Default::default();
1675    let result = tree_map_reduce(
1676        context,
1677        RenderInput::new(handle, parent_style),
1678        |context, input| process_dom_node(input, err_out, context),
1679    );
1680
1681    html_trace!("### dom_to_render_tree: out= {:#?}", result);
1682    result
1683}
1684
1685#[cfg(feature = "css")]
1686/// Return a string representation of the CSS rules parsed from
1687/// the DOM document.
1688pub fn dom_to_parsed_style(dom: &RcDom) -> Result<String> {
1689    let handle = dom.document.clone();
1690    let doc_style_data = css::dom_extract::dom_to_stylesheet(handle, &mut std::io::sink())?;
1691    Ok(doc_style_data.to_string())
1692}
1693
1694fn pending<F>(
1695    input: RenderInput,
1696    f: F,
1697) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1698where
1699    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1700{
1701    TreeMapResult::PendingChildren {
1702        children: input.children(),
1703        cons: Box::new(move |ctx, children| Ok(f(ctx, children))),
1704        prefn: None,
1705        postfn: None,
1706    }
1707}
1708
1709fn pending_noempty<F>(
1710    input: RenderInput,
1711    f: F,
1712) -> TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>
1713where
1714    F: FnOnce(&mut HtmlContext, Vec<RenderNode>) -> Option<RenderNode> + 'static,
1715{
1716    let handle = &input.handle;
1717    let style = &input.parent_style;
1718    TreeMapResult::PendingChildren {
1719        children: handle
1720            .children
1721            .borrow()
1722            .iter()
1723            .map(|child| RenderInput::new(child.clone(), Rc::clone(style)))
1724            .collect(),
1725        cons: Box::new(move |ctx, children| {
1726            if children.is_empty() {
1727                Ok(None)
1728            } else {
1729                Ok(f(ctx, children))
1730            }
1731        }),
1732        prefn: None,
1733        postfn: None,
1734    }
1735}
1736
1737#[derive(Copy, Clone, Eq, PartialEq, Debug)]
1738enum ChildPosition {
1739    Start,
1740    End,
1741}
1742
1743/// Prepend or append a FragmentStart (or analogous) marker to an existing
1744/// RenderNode.
1745fn insert_child(
1746    new_child: RenderNode,
1747    mut orig: RenderNode,
1748    position: ChildPosition,
1749) -> RenderNode {
1750    use RenderNodeInfo::*;
1751    html_trace!("insert_child({:?}, {:?}, {:?})", new_child, orig, position);
1752
1753    match orig.info {
1754        // For block elements such as Block and Div, we need to insert
1755        // the node at the front of their children array, otherwise
1756        // the renderer is liable to drop the fragment start marker
1757        // _before_ the new line indicating the end of the previous
1758        // paragraph.
1759        //
1760        // For Container, we do the same thing just to make the data
1761        // less pointlessly nested.
1762        Block(ref mut children)
1763        | ListItem(ref mut children)
1764        | Dd(ref mut children)
1765        | Dt(ref mut children)
1766        | Dl(ref mut children)
1767        | Div(ref mut children)
1768        | BlockQuote(ref mut children)
1769        | Container(ref mut children)
1770        | TableCell(RenderTableCell {
1771            content: ref mut children,
1772            ..
1773        }) => {
1774            match position {
1775                ChildPosition::Start => children.insert(0, new_child),
1776                ChildPosition::End => children.push(new_child),
1777            }
1778            // Now return orig, but we do that outside the match so
1779            // that we've given back the borrowed ref 'children'.
1780        }
1781
1782        // For table rows and tables, push down if there's any content.
1783        TableRow(ref mut rrow, _) => {
1784            // If the row is empty, then there isn't really anything
1785            // to attach the fragment start to.
1786            if let Some(cell) = rrow.cells.first_mut() {
1787                match position {
1788                    ChildPosition::Start => cell.content.insert(0, new_child),
1789                    ChildPosition::End => cell.content.push(new_child),
1790                }
1791            }
1792        }
1793
1794        TableBody(ref mut rows) | Table(RenderTable { ref mut rows, .. }) => {
1795            // If the row is empty, then there isn't really anything
1796            // to attach the fragment start to.
1797            if let Some(rrow) = rows.first_mut() {
1798                if let Some(cell) = rrow.cells.first_mut() {
1799                    match position {
1800                        ChildPosition::Start => cell.content.insert(0, new_child),
1801                        ChildPosition::End => cell.content.push(new_child),
1802                    }
1803                }
1804            }
1805        }
1806
1807        // For anything else, just make a new Container with the
1808        // new_child node and the original one.
1809        _ => {
1810            let result = match position {
1811                ChildPosition::Start => RenderNode::new(Container(vec![new_child, orig])),
1812                ChildPosition::End => RenderNode::new(Container(vec![orig, new_child])),
1813            };
1814            html_trace!("insert_child() -> {:?}", result);
1815            return result;
1816        }
1817    }
1818    html_trace!("insert_child() -> {:?}", &orig);
1819    orig
1820}
1821
1822fn process_dom_node<T: Write>(
1823    input: RenderInput,
1824    err_out: &mut T,
1825    #[allow(unused)] // Used with css feature
1826    context: &mut HtmlContext,
1827) -> Result<TreeMapResult<'static, HtmlContext, RenderInput, RenderNode>> {
1828    use RenderNodeInfo::*;
1829    use TreeMapResult::*;
1830
1831    Ok(match input.handle.clone().data {
1832        Document => pending(input, |_context, cs| Some(RenderNode::new(Container(cs)))),
1833        Comment { .. } => Nothing,
1834        Element {
1835            ref name,
1836            ref attrs,
1837            ..
1838        } => {
1839            let mut frag_from_name_attr = false;
1840
1841            let RenderInput {
1842                ref handle,
1843                ref parent_style,
1844                ..
1845            } = input;
1846
1847            #[cfg(feature = "css")]
1848            let use_doc_css = context.use_doc_css;
1849            #[cfg(not(feature = "css"))]
1850            let use_doc_css = false;
1851
1852            let computed = {
1853                let computed = context
1854                    .style_data
1855                    .computed_style(parent_style, handle, use_doc_css);
1856                #[cfg(feature = "css")]
1857                match computed.display.val() {
1858                    Some(css::Display::None) => return Ok(Nothing),
1859                    #[cfg(feature = "css_ext")]
1860                    Some(css::Display::ExtRawDom) => {
1861                        use html5ever::interface::{NodeOrText, TreeSink};
1862                        use html5ever::{LocalName, QualName};
1863                        let mut html_bytes: Vec<u8> = Default::default();
1864                        handle.serialize(&mut html_bytes)?;
1865
1866                        // Make a new DOM object so that we can easily create new
1867                        // nodes.  They will be independent.
1868                        let dom = RcDom::default();
1869
1870                        // We'll enclose it in a `<pre>`, so that we have an element in the right
1871                        // shape to process.
1872                        let html_string = String::from_utf8_lossy(&html_bytes).into_owned();
1873                        let pre_node = dom.create_element(
1874                            QualName::new(None, ns!(html), LocalName::from("pre")),
1875                            vec![],
1876                            Default::default(),
1877                        );
1878                        dom.append(&pre_node, NodeOrText::AppendText(html_string.into()));
1879
1880                        // Remove the RawDom setting; we don't want to be recursively converting to
1881                        // raw DOM.
1882                        let mut my_computed = computed;
1883                        my_computed.display = Default::default();
1884                        // Preformat it
1885                        my_computed.white_space.maybe_update(
1886                            false,
1887                            StyleOrigin::Agent,
1888                            Default::default(),
1889                            WhiteSpace::Pre,
1890                        );
1891                        my_computed.internal_pre = true;
1892
1893                        let new_input = RenderInput {
1894                            handle: pre_node,
1895                            parent_style: Rc::new(my_computed.clone()),
1896                            extra_styles: Default::default(),
1897                            node_lengths: Default::default(),
1898                        };
1899
1900                        if let Some(syntax_info) = my_computed.syntax.val() {
1901                            if let Some(highlighter) =
1902                                context.syntax_highlighters.get(&syntax_info.language)
1903                            {
1904                                // Do the highlighting here.
1905                                let text = new_input.extract_raw_text();
1906                                let highlighted = highlighter(&text);
1907                                new_input.set_syntax_info(&text, highlighted);
1908                            }
1909                        }
1910                        return Ok(pending(new_input, move |_, cs| {
1911                            Some(RenderNode::new_styled(Container(cs), my_computed))
1912                        }));
1913                    }
1914                    _ => (),
1915                }
1916                #[cfg(feature = "css_ext")]
1917                if let Some(syntax_info) = computed.syntax.val() {
1918                    if let Some(highlighter) =
1919                        context.syntax_highlighters.get(&syntax_info.language)
1920                    {
1921                        let extracted_text = input.extract_raw_text();
1922                        let highlighted = highlighter(&extracted_text);
1923                        input.set_syntax_info(&extracted_text, highlighted);
1924                    }
1925                }
1926
1927                computed
1928            };
1929
1930            let computed_before = computed.content_before.clone();
1931            let computed_after = computed.content_after.clone();
1932
1933            let result = match name.expanded() {
1934                expanded_name!(html "html") | expanded_name!(html "body") => {
1935                    /* process children, but don't add anything */
1936                    pending(input, move |_, cs| {
1937                        Some(RenderNode::new_styled(Container(cs), computed))
1938                    })
1939                }
1940                expanded_name!(html "link")
1941                | expanded_name!(html "meta")
1942                | expanded_name!(html "hr")
1943                | expanded_name!(html "script")
1944                | expanded_name!(html "style")
1945                | expanded_name!(html "head") => {
1946                    /* Ignore the head and its children */
1947                    Nothing
1948                }
1949                expanded_name!(html "span") => {
1950                    /* process children, but don't add anything */
1951                    pending_noempty(input, move |_, cs| {
1952                        Some(RenderNode::new_styled(Container(cs), computed))
1953                    })
1954                }
1955                expanded_name!(html "a") => {
1956                    let borrowed = attrs.borrow();
1957                    let mut target = None;
1958                    frag_from_name_attr = true;
1959                    for attr in borrowed.iter() {
1960                        if &attr.name.local == "href" {
1961                            target = Some(&*attr.value);
1962                            break;
1963                        }
1964                    }
1965                    PendingChildren {
1966                        children: input.children(),
1967                        cons: if let Some(href) = target {
1968                            let href: String = href.into();
1969                            Box::new(move |_, cs: Vec<RenderNode>| {
1970                                if cs.iter().any(|c| !c.is_shallow_empty()) {
1971                                    Ok(Some(RenderNode::new_styled(Link(href, cs), computed)))
1972                                } else {
1973                                    Ok(None)
1974                                }
1975                            })
1976                        } else {
1977                            Box::new(move |_, cs| {
1978                                Ok(Some(RenderNode::new_styled(Container(cs), computed)))
1979                            })
1980                        },
1981                        prefn: None,
1982                        postfn: None,
1983                    }
1984                }
1985                expanded_name!(html "em")
1986                | expanded_name!(html "i")
1987                | expanded_name!(html "ins") => pending(input, move |_, cs| {
1988                    Some(RenderNode::new_styled(Em(cs), computed))
1989                }),
1990                expanded_name!(html "strong") | expanded_name!(html "b") => {
1991                    pending(input, move |_, cs| {
1992                        Some(RenderNode::new_styled(Strong(cs), computed))
1993                    })
1994                }
1995                expanded_name!(html "s") | expanded_name!(html "del") => {
1996                    pending(input, move |_, cs| {
1997                        Some(RenderNode::new_styled(Strikeout(cs), computed))
1998                    })
1999                }
2000                expanded_name!(html "code") => pending(input, move |_, cs| {
2001                    Some(RenderNode::new_styled(Code(cs), computed))
2002                }),
2003                expanded_name!(html "img") => {
2004                    let borrowed = attrs.borrow();
2005                    let mut title = None;
2006                    let mut src = None;
2007                    for attr in borrowed.iter() {
2008                        if &attr.name.local == "alt" && !attr.value.is_empty() {
2009                            title = Some(&*attr.value);
2010                        }
2011                        if &attr.name.local == "src" && !attr.value.is_empty() {
2012                            src = Some(&*attr.value);
2013                        }
2014                        if title.is_some() && src.is_some() {
2015                            break;
2016                        }
2017                    }
2018                    // Ignore `<img>` without src.
2019                    if let Some(src) = src {
2020                        Finished(RenderNode::new_styled(
2021                            Img(src.into(), title.unwrap_or("").into()),
2022                            computed,
2023                        ))
2024                    } else {
2025                        Nothing
2026                    }
2027                }
2028                expanded_name!(svg "svg") => {
2029                    // Inline SVG: look for a <title> child for the title.
2030                    let mut title = None;
2031
2032                    for node in input.handle.children.borrow().iter() {
2033                        if let markup5ever_rcdom::NodeData::Element { ref name, .. } = node.data {
2034                            if matches!(name.expanded(), expanded_name!(svg "title")) {
2035                                let mut title_str = String::new();
2036                                for subnode in node.children.borrow().iter() {
2037                                    if let markup5ever_rcdom::NodeData::Text { ref contents } =
2038                                        subnode.data
2039                                    {
2040                                        title_str.push_str(&contents.borrow());
2041                                    }
2042                                }
2043                                title = Some(title_str);
2044                            } else {
2045                                // The first item has to be <title>
2046                                break;
2047                            }
2048                        }
2049                    }
2050
2051                    Finished(RenderNode::new_styled(
2052                        Svg(title.unwrap_or_else(String::new)),
2053                        computed,
2054                    ))
2055                }
2056                expanded_name!(html "h1")
2057                | expanded_name!(html "h2")
2058                | expanded_name!(html "h3")
2059                | expanded_name!(html "h4")
2060                | expanded_name!(html "h5")
2061                | expanded_name!(html "h6") => {
2062                    let level: usize = name.local[1..].parse().unwrap();
2063                    pending(input, move |_, cs| {
2064                        Some(RenderNode::new_styled(Header(level, cs), computed))
2065                    })
2066                }
2067                expanded_name!(html "p") => pending_noempty(input, move |_, cs| {
2068                    Some(RenderNode::new_styled(Block(cs), computed))
2069                }),
2070                expanded_name!(html "li") => pending(input, move |_, cs| {
2071                    Some(RenderNode::new_styled(ListItem(cs), computed))
2072                }),
2073                expanded_name!(html "sup") => pending(input, move |_, cs| {
2074                    Some(RenderNode::new_styled(Sup(cs), computed))
2075                }),
2076                expanded_name!(html "div") => pending_noempty(input, move |_, cs| {
2077                    Some(RenderNode::new_styled(Div(cs), computed))
2078                }),
2079                expanded_name!(html "pre") => pending(input, move |_, cs| {
2080                    let mut computed = computed;
2081                    computed.white_space.maybe_update(
2082                        false,
2083                        StyleOrigin::Agent,
2084                        Default::default(),
2085                        WhiteSpace::Pre,
2086                    );
2087                    computed.internal_pre = true;
2088                    Some(RenderNode::new_styled(Block(cs), computed))
2089                }),
2090                expanded_name!(html "br") => Finished(RenderNode::new_styled(Break, computed)),
2091                expanded_name!(html "wbr") => {
2092                    Finished(RenderNode::new_styled(Text("\u{200b}".into()), computed))
2093                }
2094                expanded_name!(html "table") => table_to_render_tree(input, computed, err_out),
2095                expanded_name!(html "thead") | expanded_name!(html "tbody") => {
2096                    tbody_to_render_tree(input, computed, err_out)
2097                }
2098                expanded_name!(html "tr") => tr_to_render_tree(input, computed, err_out),
2099                expanded_name!(html "th") | expanded_name!(html "td") => {
2100                    td_to_render_tree(input, computed, err_out)
2101                }
2102                expanded_name!(html "blockquote") => pending_noempty(input, move |_, cs| {
2103                    Some(RenderNode::new_styled(BlockQuote(cs), computed))
2104                }),
2105                expanded_name!(html "ul") => pending_noempty(input, move |_, cs| {
2106                    Some(RenderNode::new_styled(Ul(cs), computed))
2107                }),
2108                expanded_name!(html "ol") => {
2109                    let borrowed = attrs.borrow();
2110                    let mut start = 1;
2111                    for attr in borrowed.iter() {
2112                        if &attr.name.local == "start" {
2113                            start = attr.value.parse().ok().unwrap_or(1);
2114                            break;
2115                        }
2116                    }
2117
2118                    pending_noempty(input, move |_, cs| {
2119                        // There can be extra nodes which aren't ListItem (like whitespace text
2120                        // nodes).  We need to filter those out to avoid messing up the rendering.
2121                        let cs = cs
2122                            .into_iter()
2123                            .filter(|n| matches!(n.info, RenderNodeInfo::ListItem(..)))
2124                            .collect();
2125                        Some(RenderNode::new_styled(Ol(start, cs), computed))
2126                    })
2127                }
2128                expanded_name!(html "dl") => {
2129                    pending_noempty(input, move |_, cs| {
2130                        // There can be extra nodes which aren't Dt or Dd (like whitespace text
2131                        // nodes).  We need to filter those out to avoid messing up the rendering.
2132                        let cs = cs
2133                            .into_iter()
2134                            .filter(|n| {
2135                                matches!(n.info, RenderNodeInfo::Dt(..) | RenderNodeInfo::Dd(..))
2136                            })
2137                            .collect();
2138                        Some(RenderNode::new_styled(Dl(cs), computed))
2139                    })
2140                }
2141                expanded_name!(html "dt") => pending(input, move |_, cs| {
2142                    Some(RenderNode::new_styled(Dt(cs), computed))
2143                }),
2144                expanded_name!(html "dd") => pending(input, move |_, cs| {
2145                    Some(RenderNode::new_styled(Dd(cs), computed))
2146                }),
2147                _ => {
2148                    html_trace!("Unhandled element: {:?}\n", name.local);
2149                    pending_noempty(input, move |_, cs| {
2150                        Some(RenderNode::new_styled(Container(cs), computed))
2151                    })
2152                }
2153            };
2154
2155            let mut fragment = None;
2156            let borrowed = attrs.borrow();
2157            for attr in borrowed.iter() {
2158                if &attr.name.local == "id" || (frag_from_name_attr && &attr.name.local == "name") {
2159                    fragment = Some(attr.value.to_string());
2160                    break;
2161                }
2162            }
2163
2164            let result = if computed_before.is_some() || computed_after.is_some() {
2165                let wrap_nodes = move |mut node: RenderNode| {
2166                    if let Some(ref content) = computed_before {
2167                        if let Some(pseudo_content) = content.content.val() {
2168                            node = insert_child(
2169                                RenderNode::new(Text(pseudo_content.text.clone())),
2170                                node,
2171                                ChildPosition::Start,
2172                            );
2173                        }
2174                    }
2175                    if let Some(ref content) = computed_after {
2176                        if let Some(pseudo_content) = content.content.val() {
2177                            node = insert_child(
2178                                RenderNode::new(Text(pseudo_content.text.clone())),
2179                                node,
2180                                ChildPosition::End,
2181                            );
2182                        }
2183                    }
2184                    node
2185                };
2186                // Insert extra content nodes
2187                match result {
2188                    Finished(node) => Finished(wrap_nodes(node)),
2189                    // Do we need to wrap a Nothing?
2190                    Nothing => Nothing,
2191                    PendingChildren {
2192                        children,
2193                        cons,
2194                        prefn,
2195                        postfn,
2196                    } => PendingChildren {
2197                        children,
2198                        prefn,
2199                        postfn,
2200                        cons: Box::new(move |ctx, ch| match cons(ctx, ch)? {
2201                            None => Ok(None),
2202                            Some(node) => Ok(Some(wrap_nodes(node))),
2203                        }),
2204                    },
2205                }
2206            } else {
2207                result
2208            };
2209
2210            let Some(fragname) = fragment else {
2211                return Ok(result);
2212            };
2213            match result {
2214                Finished(node) => Finished(insert_child(
2215                    RenderNode::new(FragStart(fragname)),
2216                    node,
2217                    ChildPosition::Start,
2218                )),
2219                Nothing => Finished(RenderNode::new(FragStart(fragname))),
2220                PendingChildren {
2221                    children,
2222                    cons,
2223                    prefn,
2224                    postfn,
2225                } => PendingChildren {
2226                    children,
2227                    prefn,
2228                    postfn,
2229                    cons: Box::new(move |ctx, ch| {
2230                        let fragnode = RenderNode::new(FragStart(fragname));
2231                        match cons(ctx, ch)? {
2232                            None => Ok(Some(fragnode)),
2233                            Some(node) => {
2234                                Ok(Some(insert_child(fragnode, node, ChildPosition::Start)))
2235                            }
2236                        }
2237                    }),
2238                },
2239            }
2240        }
2241        markup5ever_rcdom::NodeData::Text { contents: ref tstr } => {
2242            #[cfg(feature = "css_ext")]
2243            if !input.extra_styles.borrow().is_empty() {
2244                let mut nodes = Vec::new();
2245                let mut offset = 0;
2246                for part in &*input.extra_styles.borrow() {
2247                    let (start, end) = (part.0.start, part.0.end);
2248                    if start > offset {
2249                        // Handle the unstyled bit at the start
2250                        nodes.push(RenderNode::new(Text((tstr.borrow()[offset..start]).into())));
2251                    }
2252                    let mut cstyle = input.parent_style.inherit();
2253                    cstyle.colour.maybe_update(
2254                        // TODO: use the right specificity
2255                        cstyle.syntax.important,
2256                        cstyle.syntax.origin,
2257                        cstyle.syntax.specificity,
2258                        part.1.fg_colour,
2259                    );
2260                    if let Some(bgcol) = part.1.bg_colour {
2261                        cstyle.bg_colour.maybe_update(
2262                            // TODO: use the right specificity
2263                            cstyle.syntax.important,
2264                            cstyle.syntax.origin,
2265                            cstyle.syntax.specificity,
2266                            bgcol,
2267                        );
2268                    }
2269                    // Now the styled part
2270                    nodes.push(RenderNode::new_styled(
2271                        Text((tstr.borrow()[start..end]).into()),
2272                        cstyle,
2273                    ));
2274                    offset = end;
2275                }
2276                // the final bit
2277                if offset < tstr.borrow().len() {
2278                    nodes.push(RenderNode::new(Text((tstr.borrow()[offset..]).into())));
2279                }
2280                if nodes.len() == 1 {
2281                    return Ok(Finished(nodes.pop().unwrap()));
2282                } else {
2283                    return Ok(Finished(RenderNode::new(RenderNodeInfo::Container(nodes))));
2284                }
2285            }
2286
2287            Finished(RenderNode::new(Text((&*tstr.borrow()).into())))
2288        }
2289        _ => {
2290            // NodeData doesn't have a Debug impl.
2291            writeln!(err_out, "Unhandled node type.").unwrap();
2292            Nothing
2293        }
2294    })
2295}
2296
2297fn render_tree_to_string<T: Write, D: TextDecorator>(
2298    context: &mut HtmlContext,
2299    renderer: SubRenderer<D>,
2300    decorator: &D,
2301    tree: RenderNode,
2302    err_out: &mut T,
2303) -> Result<SubRenderer<D>> {
2304    /* Phase 1: get size estimates. */
2305    // can't actually error, but Ok-wrap to satisfy tree_map_reduce signature
2306    tree_map_reduce(context, &tree, |context, node| {
2307        Ok(precalc_size_estimate(node, context, decorator))
2308    })?;
2309    /* Phase 2: actually render. */
2310    let mut renderer = TextRenderer::new(renderer);
2311    tree_map_reduce(&mut renderer, tree, |renderer, node| {
2312        Ok(do_render_node(renderer, node, err_out)?)
2313    })?;
2314    let (mut renderer, links) = renderer.into_inner();
2315    let lines = renderer.finalise(links);
2316    // And add the links
2317    if !lines.is_empty() {
2318        renderer.start_block()?;
2319        renderer.fmt_links(lines);
2320    }
2321    Ok(renderer)
2322}
2323
2324fn pending2<
2325    D: TextDecorator,
2326    F: FnOnce(
2327            &mut TextRenderer<D>,
2328            Vec<Option<SubRenderer<D>>>,
2329        ) -> Result<Option<Option<SubRenderer<D>>>>
2330        + 'static,
2331>(
2332    children: Vec<RenderNode>,
2333    f: F,
2334) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2335    TreeMapResult::PendingChildren {
2336        children,
2337        cons: Box::new(f),
2338        prefn: None,
2339        postfn: None,
2340    }
2341}
2342
2343/// Keep track of what style state has been applied to a renderer so that we
2344/// can undo it.
2345#[derive(Default)]
2346struct PushedStyleInfo {
2347    colour: bool,
2348    bgcolour: bool,
2349    white_space: bool,
2350    preformat: bool,
2351}
2352
2353impl PushedStyleInfo {
2354    fn apply<D: TextDecorator>(render: &mut TextRenderer<D>, style: &ComputedStyle) -> Self {
2355        #[allow(unused_mut)]
2356        let mut result: PushedStyleInfo = Default::default();
2357        #[cfg(feature = "css")]
2358        if let Some(col) = style.colour.val() {
2359            render.push_colour(*col);
2360            result.colour = true;
2361        }
2362        #[cfg(feature = "css")]
2363        if let Some(col) = style.bg_colour.val() {
2364            render.push_bgcolour(*col);
2365            result.bgcolour = true;
2366        }
2367        if let Some(ws) = style.white_space.val() {
2368            if let WhiteSpace::Pre | WhiteSpace::PreWrap = ws {
2369                render.push_ws(*ws);
2370                result.white_space = true;
2371            }
2372        }
2373        if style.internal_pre {
2374            render.push_preformat();
2375            result.preformat = true;
2376        }
2377        result
2378    }
2379    fn unwind<D: TextDecorator>(self, renderer: &mut TextRenderer<D>) {
2380        if self.bgcolour {
2381            renderer.pop_bgcolour();
2382        }
2383        if self.colour {
2384            renderer.pop_colour();
2385        }
2386        if self.white_space {
2387            renderer.pop_ws();
2388        }
2389        if self.preformat {
2390            renderer.pop_preformat();
2391        }
2392    }
2393}
2394
2395fn do_render_node<T: Write, D: TextDecorator>(
2396    renderer: &mut TextRenderer<D>,
2397    tree: RenderNode,
2398    err_out: &mut T,
2399) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2400    html_trace!("do_render_node({:?}", tree);
2401    use RenderNodeInfo::*;
2402    use TreeMapResult::*;
2403
2404    let size_estimate = tree.size_estimate.get().unwrap_or_default();
2405
2406    let pushed_style = PushedStyleInfo::apply(renderer, &tree.style);
2407
2408    Ok(match tree.info {
2409        Text(ref tstr) => {
2410            renderer.add_inline_text(tstr)?;
2411            pushed_style.unwind(renderer);
2412            Finished(None)
2413        }
2414        Container(children) => pending2(children, |renderer, _| {
2415            pushed_style.unwind(renderer);
2416            Ok(Some(None))
2417        }),
2418        Link(href, children) => {
2419            renderer.start_link(&href)?;
2420            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2421                renderer.end_link()?;
2422                pushed_style.unwind(renderer);
2423                Ok(Some(None))
2424            })
2425        }
2426        Em(children) => {
2427            renderer.start_emphasis()?;
2428            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2429                renderer.end_emphasis()?;
2430                pushed_style.unwind(renderer);
2431                Ok(Some(None))
2432            })
2433        }
2434        Strong(children) => {
2435            renderer.start_strong()?;
2436            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2437                renderer.end_strong()?;
2438                pushed_style.unwind(renderer);
2439                Ok(Some(None))
2440            })
2441        }
2442        Strikeout(children) => {
2443            renderer.start_strikeout()?;
2444            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2445                renderer.end_strikeout()?;
2446                pushed_style.unwind(renderer);
2447                Ok(Some(None))
2448            })
2449        }
2450        Code(children) => {
2451            renderer.start_code()?;
2452            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2453                renderer.end_code()?;
2454                pushed_style.unwind(renderer);
2455                Ok(Some(None))
2456            })
2457        }
2458        Img(src, title) => {
2459            renderer.add_image(&src, &title)?;
2460            pushed_style.unwind(renderer);
2461            Finished(None)
2462        }
2463        Svg(title) => {
2464            renderer.add_image("", &title)?;
2465            pushed_style.unwind(renderer);
2466            Finished(None)
2467        }
2468        Block(children) | ListItem(children) => {
2469            renderer.start_block()?;
2470            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2471                renderer.end_block();
2472                pushed_style.unwind(renderer);
2473                Ok(Some(None))
2474            })
2475        }
2476        Header(level, children) => {
2477            let prefix = renderer.header_prefix(level);
2478            let prefix_size = size_estimate.prefix_size;
2479            debug_assert!(prefix.len() == prefix_size);
2480            let min_width = size_estimate.min_width;
2481            let inner_width = min_width.saturating_sub(prefix_size);
2482            let sub_builder =
2483                renderer.new_sub_renderer(renderer.width_minus(prefix_size, inner_width)?)?;
2484            renderer.push(sub_builder);
2485            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2486                let sub_builder = renderer.pop();
2487
2488                renderer.start_block()?;
2489                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2490                renderer.end_block();
2491                pushed_style.unwind(renderer);
2492                Ok(Some(None))
2493            })
2494        }
2495        Div(children) => {
2496            renderer.new_line()?;
2497            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2498                renderer.new_line()?;
2499                pushed_style.unwind(renderer);
2500                Ok(Some(None))
2501            })
2502        }
2503        BlockQuote(children) => {
2504            let prefix = renderer.quote_prefix();
2505            debug_assert!(size_estimate.prefix_size == prefix.len());
2506            let inner_width = size_estimate.min_width - prefix.len();
2507            let sub_builder =
2508                renderer.new_sub_renderer(renderer.width_minus(prefix.len(), inner_width)?)?;
2509            renderer.push(sub_builder);
2510            pending2(children, move |renderer: &mut TextRenderer<D>, _| {
2511                let sub_builder = renderer.pop();
2512
2513                renderer.start_block()?;
2514                renderer.append_subrender(sub_builder, repeat(&prefix[..]))?;
2515                renderer.end_block();
2516                pushed_style.unwind(renderer);
2517                Ok(Some(None))
2518            })
2519        }
2520        Ul(items) => {
2521            let prefix = renderer.unordered_item_prefix();
2522            let prefix_len = prefix.len();
2523
2524            TreeMapResult::PendingChildren {
2525                children: items,
2526                cons: Box::new(|renderer, _| {
2527                    pushed_style.unwind(renderer);
2528                    Ok(Some(None))
2529                }),
2530                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2531                    let inner_width = size_estimate.min_width - prefix_len;
2532                    let sub_builder = renderer
2533                        .new_sub_renderer(renderer.width_minus(prefix_len, inner_width)?)?;
2534                    renderer.push(sub_builder);
2535                    Ok(())
2536                })),
2537                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2538                    let sub_builder = renderer.pop();
2539
2540                    let indent = " ".repeat(prefix.len());
2541
2542                    renderer.append_subrender(
2543                        sub_builder,
2544                        once(&prefix[..]).chain(repeat(&indent[..])),
2545                    )?;
2546                    Ok(())
2547                })),
2548            }
2549        }
2550        Ol(start, items) => {
2551            let num_items = items.len();
2552
2553            // The prefix width could be at either end if the start is negative.
2554            let min_number = start;
2555            // Assumption: num_items can't overflow isize.
2556            let max_number = start + (num_items as i64) - 1;
2557            let prefix_width_min = renderer.ordered_item_prefix(min_number).len();
2558            let prefix_width_max = renderer.ordered_item_prefix(max_number).len();
2559            let prefix_width = max(prefix_width_min, prefix_width_max);
2560            let prefixn = format!("{: <width$}", "", width = prefix_width);
2561            let i: Cell<_> = Cell::new(start);
2562
2563            TreeMapResult::PendingChildren {
2564                children: items,
2565                cons: Box::new(|renderer, _| {
2566                    pushed_style.unwind(renderer);
2567                    Ok(Some(None))
2568                }),
2569                prefn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2570                    let inner_min = size_estimate.min_width - size_estimate.prefix_size;
2571                    let sub_builder = renderer
2572                        .new_sub_renderer(renderer.width_minus(prefix_width, inner_min)?)?;
2573                    renderer.push(sub_builder);
2574                    Ok(())
2575                })),
2576                postfn: Some(Box::new(move |renderer: &mut TextRenderer<D>, _| {
2577                    let sub_builder = renderer.pop();
2578                    let prefix1 = renderer.ordered_item_prefix(i.get());
2579                    let prefix1 = format!("{: <width$}", prefix1, width = prefix_width);
2580
2581                    renderer.append_subrender(
2582                        sub_builder,
2583                        once(prefix1.as_str()).chain(repeat(prefixn.as_str())),
2584                    )?;
2585                    i.set(i.get() + 1);
2586                    Ok(())
2587                })),
2588            }
2589        }
2590        Dl(items) => {
2591            renderer.start_block()?;
2592
2593            TreeMapResult::PendingChildren {
2594                children: items,
2595                cons: Box::new(|renderer, _| {
2596                    pushed_style.unwind(renderer);
2597                    Ok(Some(None))
2598                }),
2599                prefn: None,
2600                postfn: None,
2601            }
2602        }
2603        Dt(children) => {
2604            renderer.new_line()?;
2605            renderer.start_emphasis()?;
2606            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2607                renderer.end_emphasis()?;
2608                pushed_style.unwind(renderer);
2609                Ok(Some(None))
2610            })
2611        }
2612        Dd(children) => {
2613            let inner_min = size_estimate.min_width - 2;
2614            let sub_builder = renderer.new_sub_renderer(renderer.width_minus(2, inner_min)?)?;
2615            renderer.push(sub_builder);
2616            pending2(children, |renderer: &mut TextRenderer<D>, _| {
2617                let sub_builder = renderer.pop();
2618                renderer.append_subrender(sub_builder, repeat("  "))?;
2619                pushed_style.unwind(renderer);
2620                Ok(Some(None))
2621            })
2622        }
2623        Break => {
2624            renderer.new_line_hard()?;
2625            pushed_style.unwind(renderer);
2626            Finished(None)
2627        }
2628        Table(tab) => render_table_tree(renderer, tab, err_out)?,
2629        TableRow(row, false) => render_table_row(renderer, row, pushed_style, err_out),
2630        TableRow(row, true) => render_table_row_vert(renderer, row, pushed_style, err_out),
2631        TableBody(_) => unimplemented!("Unexpected TableBody while rendering"),
2632        TableCell(cell) => render_table_cell(renderer, cell, pushed_style, err_out),
2633        FragStart(fragname) => {
2634            renderer.record_frag_start(&fragname);
2635            pushed_style.unwind(renderer);
2636            Finished(None)
2637        }
2638        Sup(children) => {
2639            // Special case for digit-only superscripts - use superscript
2640            // characters.
2641            fn sup_digits(children: &[RenderNode]) -> Option<String> {
2642                let [node] = children else {
2643                    return None;
2644                };
2645                if let Text(s) = &node.info {
2646                    if s.chars().all(|d| d.is_ascii_digit()) {
2647                        // It's just a string of digits - replace by superscript characters.
2648                        const SUPERSCRIPTS: [char; 10] =
2649                            ['⁰', '¹', '²', '³', '⁴', '⁵', '⁶', '⁷', '⁸', '⁹'];
2650                        return Some(
2651                            s.bytes()
2652                                .map(|b| SUPERSCRIPTS[(b - b'0') as usize])
2653                                .collect(),
2654                        );
2655                    }
2656                }
2657                None
2658            }
2659            if let Some(digitstr) = sup_digits(&children) {
2660                renderer.add_inline_text(&digitstr)?;
2661                pushed_style.unwind(renderer);
2662                Finished(None)
2663            } else {
2664                renderer.start_superscript()?;
2665                pending2(children, |renderer: &mut TextRenderer<D>, _| {
2666                    renderer.end_superscript()?;
2667                    pushed_style.unwind(renderer);
2668                    Ok(Some(None))
2669                })
2670            }
2671        }
2672    })
2673}
2674
2675fn render_table_tree<T: Write, D: TextDecorator>(
2676    renderer: &mut TextRenderer<D>,
2677    table: RenderTable,
2678    _err_out: &mut T,
2679) -> render::Result<TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>>> {
2680    /* Now lay out the table. */
2681    let num_columns = table.num_columns;
2682
2683    /* Heuristic: scale the column widths according to how much content there is. */
2684    let mut col_sizes: Vec<SizeEstimate> = vec![Default::default(); num_columns];
2685
2686    for row in table.rows() {
2687        let mut colno = 0;
2688        for cell in row.cells() {
2689            // FIXME: get_size_estimate is still recursive.
2690            let mut estimate = cell.get_size_estimate();
2691
2692            // If the cell has a colspan>1, then spread its size between the
2693            // columns.
2694            estimate.size /= cell.colspan;
2695            estimate.min_width /= cell.colspan;
2696            for i in 0..cell.colspan {
2697                col_sizes[colno + i] = (col_sizes[colno + i]).max(estimate);
2698            }
2699            colno += cell.colspan;
2700        }
2701    }
2702    let tot_size: usize = col_sizes.iter().map(|est| est.size).sum();
2703    let min_size: usize = col_sizes.iter().map(|est| est.min_width).sum::<usize>()
2704        + col_sizes.len().saturating_sub(1);
2705    let width = renderer.width();
2706
2707    let vert_row = renderer.options.raw || (min_size > width || width == 0);
2708
2709    let mut col_widths: Vec<usize> = if !vert_row {
2710        col_sizes
2711            .iter()
2712            .map(|sz| {
2713                if sz.size == 0 {
2714                    0
2715                } else {
2716                    min(
2717                        sz.size,
2718                        if usize::MAX / width <= sz.size {
2719                            // The provided width is too large to multiply by width,
2720                            // so do it the other way around.
2721                            max((width / tot_size) * sz.size, sz.min_width)
2722                        } else {
2723                            max(sz.size * width / tot_size, sz.min_width)
2724                        },
2725                    )
2726                }
2727            })
2728            .collect()
2729    } else {
2730        col_sizes.iter().map(|_| width).collect()
2731    };
2732
2733    if !vert_row {
2734        let num_cols = col_widths.len();
2735        if num_cols > 0 {
2736            loop {
2737                let cur_width = col_widths.iter().sum::<usize>() + num_cols - 1;
2738                if cur_width <= width {
2739                    break;
2740                }
2741                let (i, _) = col_widths
2742                    .iter()
2743                    .enumerate()
2744                    .max_by_key(|&(colno, width)| {
2745                        (
2746                            width.saturating_sub(col_sizes[colno].min_width),
2747                            width,
2748                            usize::MAX - colno,
2749                        )
2750                    })
2751                    .unwrap();
2752                col_widths[i] -= 1;
2753            }
2754        }
2755    }
2756
2757    let table_width = if vert_row {
2758        width
2759    } else {
2760        col_widths.iter().cloned().sum::<usize>()
2761            + col_widths
2762                .iter()
2763                .filter(|&w| w > &0)
2764                .count()
2765                .saturating_sub(1)
2766    };
2767
2768    renderer.start_table()?;
2769
2770    if table_width != 0 && renderer.options.draw_borders {
2771        renderer.add_horizontal_border_width(table_width)?;
2772    }
2773
2774    Ok(TreeMapResult::PendingChildren {
2775        children: table.into_rows(col_widths, vert_row),
2776        cons: Box::new(|_, _| Ok(Some(None))),
2777        prefn: None,
2778        postfn: None,
2779    })
2780}
2781
2782fn render_table_row<T: Write, D: TextDecorator>(
2783    _renderer: &mut TextRenderer<D>,
2784    row: RenderTableRow,
2785    pushed_style: PushedStyleInfo,
2786    _err_out: &mut T,
2787) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2788    let rowspans: Vec<usize> = row.cells().map(|cell| cell.rowspan).collect();
2789    let have_overhang = row.cells().any(|cell| cell.is_dummy);
2790    TreeMapResult::PendingChildren {
2791        children: row.into_cells(false),
2792        cons: Box::new(move |builders, children| {
2793            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2794            if have_overhang || children.iter().any(|c| !c.empty()) {
2795                builders.append_columns_with_borders(
2796                    children.into_iter().zip(rowspans.into_iter()),
2797                    true,
2798                )?;
2799            }
2800            pushed_style.unwind(builders);
2801            Ok(Some(None))
2802        }),
2803        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2804            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2805                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2806                renderer.push(sub_builder);
2807                Ok(())
2808            } else {
2809                panic!()
2810            }
2811        })),
2812        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2813    }
2814}
2815
2816fn render_table_row_vert<T: Write, D: TextDecorator>(
2817    _renderer: &mut TextRenderer<D>,
2818    row: RenderTableRow,
2819    pushed_style: PushedStyleInfo,
2820    _err_out: &mut T,
2821) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2822    TreeMapResult::PendingChildren {
2823        children: row.into_cells(true),
2824        cons: Box::new(|builders, children| {
2825            let children: Vec<_> = children.into_iter().map(Option::unwrap).collect();
2826            builders.append_vert_row(children)?;
2827            pushed_style.unwind(builders);
2828            Ok(Some(None))
2829        }),
2830        prefn: Some(Box::new(|renderer: &mut TextRenderer<D>, node| {
2831            if let RenderNodeInfo::TableCell(ref cell) = node.info {
2832                let sub_builder = renderer.new_sub_renderer(cell.col_width.unwrap())?;
2833                renderer.push(sub_builder);
2834                Ok(())
2835            } else {
2836                Err(Error::Fail)
2837            }
2838        })),
2839        postfn: Some(Box::new(|_renderer: &mut TextRenderer<D>, _| Ok(()))),
2840    }
2841}
2842
2843fn render_table_cell<T: Write, D: TextDecorator>(
2844    _renderer: &mut TextRenderer<D>,
2845    cell: RenderTableCell,
2846    pushed_style: PushedStyleInfo,
2847    _err_out: &mut T,
2848) -> TreeMapResult<'static, TextRenderer<D>, RenderNode, Option<SubRenderer<D>>> {
2849    pending2(cell.content, |renderer: &mut TextRenderer<D>, _| {
2850        pushed_style.unwind(renderer);
2851        let sub_builder = renderer.pop();
2852
2853        Ok(Some(Some(sub_builder)))
2854    })
2855}
2856
2857pub mod config {
2858    //! Configure the HTML to text translation using the `Config` type, which can be
2859    //! constructed using one of the functions in this module.
2860    use std::io;
2861
2862    use super::Error;
2863    use crate::css::types::Importance;
2864    use crate::css::{Ruleset, Selector, SelectorComponent, Style, StyleData};
2865    #[cfg(feature = "css_ext")]
2866    use crate::{HighlighterMap, SyntaxHighlighter};
2867    use crate::{
2868        HtmlContext, MIN_WIDTH, RenderTree, Result,
2869        css::{PseudoContent, PseudoElement, StyleDecl},
2870        render::text_renderer::{
2871            PlainDecorator, RichAnnotation, RichDecorator, TaggedLine, TextDecorator,
2872        },
2873    };
2874
2875    /// Specify how images with missing or empty alt text are handled
2876    #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
2877    #[non_exhaustive]
2878    pub enum ImageRenderMode {
2879        /// Ignore `<img>` without alt, or `<svg>` without `<title>`.
2880        #[default]
2881        IgnoreEmpty,
2882        /// Always process images (will be handled by the decorator)
2883        ShowAlways,
2884        /// Use a fixed replacement text (e.g. emoji)
2885        Replace(&'static str),
2886        /// Replace with the last component of the link filename if any
2887        Filename,
2888    }
2889
2890    #[cfg(feature = "xml")]
2891    /// Specify HTML vs XHTML handling
2892    #[derive(Copy, Clone, Debug, Default, PartialEq, Eq)]
2893    #[non_exhaustive]
2894    pub enum XmlMode {
2895        /// Treat as HTML unless the document starts with an XML declaration
2896        /// (`<?xml ...?>`).
2897        #[default]
2898        Auto,
2899        /// Always treat as HTML
2900        Html,
2901        /// Always treat as XHTML
2902        Xhtml,
2903    }
2904
2905    /// Configure the HTML processing.
2906    pub struct Config<D: TextDecorator> {
2907        decorator: D,
2908
2909        max_wrap_width: Option<usize>,
2910
2911        style: StyleData,
2912        #[cfg(feature = "css")]
2913        use_doc_css: bool,
2914
2915        pad_block_width: bool,
2916
2917        allow_width_overflow: bool,
2918        min_wrap_width: usize,
2919        raw: bool,
2920        draw_borders: bool,
2921        wrap_links: bool,
2922        include_link_footnotes: bool,
2923        use_unicode_strikeout: bool,
2924        image_mode: ImageRenderMode,
2925
2926        #[cfg(feature = "xml")]
2927        xml_mode: XmlMode,
2928
2929        #[cfg(feature = "css_ext")]
2930        syntax_highlighters: HighlighterMap,
2931    }
2932
2933    impl<D: TextDecorator> Config<D> {
2934        /// Make the HtmlContext from self.
2935        pub(crate) fn make_context(&self) -> HtmlContext {
2936            HtmlContext {
2937                style_data: self.style.clone(),
2938                #[cfg(feature = "css")]
2939                use_doc_css: self.use_doc_css,
2940
2941                max_wrap_width: self.max_wrap_width,
2942                pad_block_width: self.pad_block_width,
2943                allow_width_overflow: self.allow_width_overflow,
2944                min_wrap_width: self.min_wrap_width,
2945                raw: self.raw,
2946                draw_borders: self.draw_borders,
2947                wrap_links: self.wrap_links,
2948                include_link_footnotes: self.include_link_footnotes,
2949                use_unicode_strikeout: self.use_unicode_strikeout,
2950                image_mode: self.image_mode,
2951
2952                #[cfg(feature = "xml")]
2953                xml_mode: self.xml_mode,
2954
2955                #[cfg(feature = "css_ext")]
2956                syntax_highlighters: self.syntax_highlighters.clone(),
2957            }
2958        }
2959        /// Parse with context.
2960        pub(crate) fn do_parse<R>(&self, context: &mut HtmlContext, input: R) -> Result<RenderTree>
2961        where
2962            R: io::Read,
2963        {
2964            #[cfg(feature = "xml")]
2965            let dom = {
2966                match context.xml_mode {
2967                    XmlMode::Html => self.parse_html(input)?,
2968                    XmlMode::Xhtml => self.parse_xml(input)?,
2969                    XmlMode::Auto => {
2970                        const XML_CHECK: &[u8] = b"<?xml";
2971                        let mut input = input;
2972                        let mut firstbuf = [0u8; XML_CHECK.len()];
2973                        let bytes_read = input.read(&mut firstbuf)?;
2974                        let first_slice = &firstbuf[..bytes_read];
2975                        if bytes_read == XML_CHECK.len() && &firstbuf == XML_CHECK {
2976                            self.parse_xml(std::io::Read::chain(first_slice, input))?
2977                        } else {
2978                            self.parse_html(std::io::Read::chain(first_slice, input))?
2979                        }
2980                    }
2981                }
2982            };
2983
2984            #[cfg(not(feature = "xml"))]
2985            let dom = self.parse_html(input)?;
2986
2987            let render_tree = super::dom_to_render_tree_with_context(
2988                dom.document.clone(),
2989                &mut io::sink(),
2990                context,
2991            )?
2992            .ok_or(Error::Fail)?;
2993            Ok(RenderTree(render_tree))
2994        }
2995
2996        /// Parse the HTML into a DOM structure.
2997        pub fn parse_html<R: io::Read>(&self, mut input: R) -> Result<super::RcDom> {
2998            use html5ever::tendril::TendrilSink;
2999            let opts = super::ParseOpts {
3000                tree_builder: super::TreeBuilderOpts {
3001                    scripting_enabled: false,
3002                    ..Default::default()
3003                },
3004                ..Default::default()
3005            };
3006            Ok(super::parse_document(super::RcDom::default(), opts)
3007                .from_utf8()
3008                .read_from(&mut input)?)
3009        }
3010
3011        #[cfg(feature = "xml")]
3012        /// Parse document as XML into a DOM structure.
3013        pub fn parse_xml<R: io::Read>(&self, mut input: R) -> Result<super::RcDom> {
3014            use ::xml5ever::{driver::parse_document, tendril::TendrilSink};
3015            let opts = Default::default();
3016            Ok(parse_document(super::RcDom::default(), opts)
3017                .from_utf8()
3018                .read_from(&mut input)?)
3019        }
3020
3021        /// Convert an HTML DOM into a RenderTree.
3022        pub fn dom_to_render_tree(&self, dom: &super::RcDom) -> Result<RenderTree> {
3023            Ok(RenderTree(
3024                super::dom_to_render_tree_with_context(
3025                    dom.document.clone(),
3026                    &mut io::sink(),
3027                    &mut self.make_context(),
3028                )?
3029                .ok_or(Error::Fail)?,
3030            ))
3031        }
3032
3033        /// Render an existing RenderTree into a string.
3034        pub fn render_to_string(&self, render_tree: RenderTree, width: usize) -> Result<String> {
3035            let s = render_tree
3036                .render_with_context(
3037                    &mut self.make_context(),
3038                    width,
3039                    self.decorator.make_subblock_decorator(),
3040                )?
3041                .into_string()?;
3042            Ok(s)
3043        }
3044
3045        /// Take an existing RenderTree, and returns text wrapped to `width` columns.
3046        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3047        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
3048        /// the `Vec`.
3049        pub fn render_to_lines(
3050            &self,
3051            render_tree: RenderTree,
3052            width: usize,
3053        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3054            render_tree
3055                .render_with_context(
3056                    &mut self.make_context(),
3057                    width,
3058                    self.decorator.make_subblock_decorator(),
3059                )?
3060                .into_lines()
3061        }
3062
3063        /// Reads HTML from `input`, and returns a `String` with text wrapped to
3064        /// `width` columns.
3065        pub fn string_from_read<R: std::io::Read>(self, input: R, width: usize) -> Result<String> {
3066            let mut context = self.make_context();
3067            let s = self
3068                .do_parse(&mut context, input)?
3069                .render_with_context(&mut context, width, self.decorator)?
3070                .into_string()?;
3071            Ok(s)
3072        }
3073
3074        /// Reads HTML from `input`, and returns text wrapped to `width` columns.
3075        /// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3076        /// of the provided text decorator's `Annotation`.  The "outer" annotation comes first in
3077        /// the `Vec`.
3078        pub fn lines_from_read<R: std::io::Read>(
3079            self,
3080            input: R,
3081            width: usize,
3082        ) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3083            let mut context = self.make_context();
3084            self.do_parse(&mut context, input)?
3085                .render_with_context(&mut context, width, self.decorator)?
3086                .into_lines()
3087        }
3088
3089        #[cfg(feature = "css")]
3090        /// Add some CSS rules which will be used (if supported) with any
3091        /// HTML processed.
3092        pub fn add_css(mut self, css: &str) -> Result<Self> {
3093            self.style.add_user_css(css)?;
3094            Ok(self)
3095        }
3096
3097        #[cfg(feature = "css")]
3098        /// Add some agent CSS rules which will be used (if supported) with any
3099        /// HTML processed.
3100        pub fn add_agent_css(mut self, css: &str) -> Result<Self> {
3101            self.style.add_agent_css(css)?;
3102            Ok(self)
3103        }
3104
3105        #[cfg(feature = "css")]
3106        /// Parse CSS from any \<style\> elements and use supported rules.
3107        pub fn use_doc_css(mut self) -> Self {
3108            self.use_doc_css = true;
3109            self
3110        }
3111
3112        /// Pad lines out to the full render width.
3113        pub fn pad_block_width(mut self) -> Self {
3114            self.pad_block_width = true;
3115            self
3116        }
3117
3118        /// Set the maximum text wrap width.
3119        /// When set, paragraphs will be wrapped to that width even if there
3120        /// is more total width available for rendering.
3121        pub fn max_wrap_width(mut self, wrap_width: usize) -> Self {
3122            self.max_wrap_width = Some(wrap_width);
3123            self
3124        }
3125
3126        /// Allow the output to be wider than the max width.  When enabled,
3127        /// then output wider than the specified width will be returned
3128        /// instead of returning `Err(TooNarrow)` if the output wouldn't
3129        /// otherwise fit.
3130        pub fn allow_width_overflow(mut self) -> Self {
3131            self.allow_width_overflow = true;
3132            self
3133        }
3134
3135        /// Set the minimum width for text wrapping.  The default is 3.
3136        /// Blocks of text will be forced to have at least this width
3137        /// (unless the text inside is less than that).  Increasing this
3138        /// can increase the chance that the width will overflow, leading
3139        /// to a TooNarrow error unless `allow_width_overflow()` is set.
3140        pub fn min_wrap_width(mut self, min_wrap_width: usize) -> Self {
3141            self.min_wrap_width = min_wrap_width;
3142            self
3143        }
3144
3145        /// Raw extraction, ensures text in table cells ends up rendered together
3146        /// This traverses tables as if they had a single column and every cell is its own row.
3147        /// Implies `no_table_borders()`
3148        pub fn raw_mode(mut self, raw: bool) -> Self {
3149            self.raw = raw;
3150            self.draw_borders = false;
3151            self
3152        }
3153
3154        /// Do not render table borders
3155        pub fn no_table_borders(mut self) -> Self {
3156            self.draw_borders = false;
3157            self
3158        }
3159        /// Do not wrap links
3160        pub fn no_link_wrapping(mut self) -> Self {
3161            self.wrap_links = false;
3162            self
3163        }
3164
3165        /// Select whether to use Unicode combining characters to strike out text.
3166        pub fn unicode_strikeout(mut self, use_unicode: bool) -> Self {
3167            self.use_unicode_strikeout = use_unicode;
3168            self
3169        }
3170
3171        /// Make a simple "contains" type rule for an element.
3172        fn make_surround_rule(element: &str, after: bool, content: &str) -> Ruleset {
3173            Ruleset {
3174                selector: Selector {
3175                    components: vec![SelectorComponent::Element(element.into())],
3176                    pseudo_element: Some(if after {
3177                        PseudoElement::After
3178                    } else {
3179                        PseudoElement::Before
3180                    }),
3181                },
3182                styles: vec![StyleDecl {
3183                    style: Style::Content(PseudoContent {
3184                        text: content.into(),
3185                    }),
3186                    importance: Importance::Default,
3187                }],
3188            }
3189        }
3190
3191        /// Decorate <em> etc. similarly to markdown
3192        pub fn do_decorate(mut self) -> Self {
3193            self.style.add_agent_rules(&[
3194                Self::make_surround_rule("em", false, "*"),
3195                Self::make_surround_rule("em", true, "*"),
3196                Self::make_surround_rule("dt", false, "*"),
3197                Self::make_surround_rule("dt", true, "*"),
3198                Self::make_surround_rule("strong", false, "**"),
3199                Self::make_surround_rule("strong", true, "**"),
3200                Self::make_surround_rule("b", false, "**"),
3201                Self::make_surround_rule("b", true, "**"),
3202                Self::make_surround_rule("code", false, "`"),
3203                Self::make_surround_rule("code", true, "`"),
3204            ]);
3205            self
3206        }
3207
3208        /// Add footnotes for hyperlinks
3209        pub fn link_footnotes(mut self, include_footnotes: bool) -> Self {
3210            self.include_link_footnotes = include_footnotes;
3211            self
3212        }
3213
3214        /// Configure how images with no alt text are handled.
3215        pub fn empty_img_mode(mut self, img_mode: ImageRenderMode) -> Self {
3216            self.image_mode = img_mode;
3217            self
3218        }
3219
3220        #[cfg(feature = "xml")]
3221        /// Configure the HTML vs XHTML parsing mode.
3222        pub fn xml_mode(mut self, xml_mode: XmlMode) -> Self {
3223            self.xml_mode = xml_mode;
3224            self
3225        }
3226
3227        #[cfg(feature = "css_ext")]
3228        /// Register a named syntax highlighter.
3229        ///
3230        /// The highlighter will be used when a `<pre>` element
3231        /// is styled with `x-syntax: name`
3232        pub fn register_highlighter(
3233            mut self,
3234            name: impl Into<String>,
3235            f: SyntaxHighlighter,
3236        ) -> Self {
3237            use std::rc::Rc;
3238
3239            self.syntax_highlighters.insert(name.into(), Rc::new(f));
3240            self
3241        }
3242    }
3243
3244    impl Config<RichDecorator> {
3245        /// Return coloured text.  `colour_map` is a function which takes
3246        /// a list of `RichAnnotation` and some text, and returns the text
3247        /// with any terminal escapes desired to indicate those annotations
3248        /// (such as colour).
3249        pub fn coloured<R, FMap>(self, input: R, width: usize, colour_map: FMap) -> Result<String>
3250        where
3251            R: std::io::Read,
3252            FMap: Fn(&[RichAnnotation], &str) -> String,
3253        {
3254            let mut context = self.make_context();
3255            let render_tree = self.do_parse(&mut context, input)?;
3256            self.render_coloured(render_tree, width, colour_map)
3257        }
3258
3259        /// Return coloured text from a RenderTree.  `colour_map` is a function which takes a list
3260        /// of `RichAnnotation` and some text, and returns the text with any terminal escapes
3261        /// desired to indicate those annotations (such as colour).
3262        pub fn render_coloured<FMap>(
3263            &self,
3264            render_tree: RenderTree,
3265            width: usize,
3266            colour_map: FMap,
3267        ) -> Result<String>
3268        where
3269            FMap: Fn(&[RichAnnotation], &str) -> String,
3270        {
3271            let lines = self.render_to_lines(render_tree, width)?;
3272
3273            let mut result = String::new();
3274            for line in lines {
3275                for ts in line.tagged_strings() {
3276                    result.push_str(&colour_map(&ts.tag, &ts.s));
3277                }
3278                result.push('\n');
3279            }
3280            Ok(result)
3281        }
3282    }
3283
3284    /// Return a Config initialized with a `RichDecorator`.
3285    pub fn rich() -> Config<RichDecorator> {
3286        with_decorator(RichDecorator::new())
3287    }
3288
3289    /// Return a Config initialized with a `PlainDecorator`.
3290    pub fn plain() -> Config<PlainDecorator> {
3291        with_decorator(PlainDecorator::new())
3292            .do_decorate()
3293            .link_footnotes(true)
3294    }
3295
3296    /// Return a Config initialized with a `PlainDecorator`.
3297    pub fn plain_no_decorate() -> Config<PlainDecorator> {
3298        with_decorator(PlainDecorator::new())
3299    }
3300
3301    /// Return a Config initialized with a custom decorator.
3302    pub fn with_decorator<D: TextDecorator>(decorator: D) -> Config<D> {
3303        Config {
3304            decorator,
3305            style: Default::default(),
3306            #[cfg(feature = "css")]
3307            use_doc_css: false,
3308            max_wrap_width: None,
3309            pad_block_width: false,
3310            allow_width_overflow: false,
3311            min_wrap_width: MIN_WIDTH,
3312            raw: false,
3313            draw_borders: true,
3314            wrap_links: true,
3315            include_link_footnotes: false,
3316            use_unicode_strikeout: true,
3317            image_mode: ImageRenderMode::IgnoreEmpty,
3318            #[cfg(feature = "xml")]
3319            xml_mode: XmlMode::Auto,
3320            #[cfg(feature = "css_ext")]
3321            syntax_highlighters: Default::default(),
3322        }
3323    }
3324}
3325
3326/// The structure of an HTML document that can be rendered using a [`TextDecorator`][].
3327///
3328/// [`TextDecorator`]: render/text_renderer/trait.TextDecorator.html
3329
3330#[derive(Clone, Debug)]
3331pub struct RenderTree(RenderNode);
3332
3333impl std::fmt::Display for RenderTree {
3334    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
3335        writeln!(f, "Render tree:")?;
3336        self.0.write_self(f, 1)
3337    }
3338}
3339
3340impl RenderTree {
3341    /// Render this document using the given `decorator` and wrap it to `width` columns.
3342    fn render_with_context<D: TextDecorator>(
3343        self,
3344        context: &mut HtmlContext,
3345        width: usize,
3346        decorator: D,
3347    ) -> Result<RenderedText<D>> {
3348        if width == 0 {
3349            return Err(Error::TooNarrow);
3350        }
3351        let render_options = RenderOptions {
3352            wrap_width: context.max_wrap_width,
3353            pad_block_width: context.pad_block_width,
3354            allow_width_overflow: context.allow_width_overflow,
3355            raw: context.raw,
3356            draw_borders: context.draw_borders,
3357            wrap_links: context.wrap_links,
3358            include_link_footnotes: context.include_link_footnotes,
3359            use_unicode_strikeout: context.use_unicode_strikeout,
3360            img_mode: context.image_mode,
3361        };
3362        let test_decorator = decorator.make_subblock_decorator();
3363        let builder = SubRenderer::new(width, render_options, decorator);
3364        let builder =
3365            render_tree_to_string(context, builder, &test_decorator, self.0, &mut io::sink())?;
3366        Ok(RenderedText(builder))
3367    }
3368}
3369
3370/// A rendered HTML document.
3371struct RenderedText<D: TextDecorator>(SubRenderer<D>);
3372
3373impl<D: TextDecorator> RenderedText<D> {
3374    /// Convert the rendered HTML document to a string.
3375    fn into_string(self) -> render::Result<String> {
3376        self.0.into_string()
3377    }
3378
3379    /// Convert the rendered HTML document to a vector of lines with the annotations created by the
3380    /// decorator.
3381    fn into_lines(self) -> Result<Vec<TaggedLine<Vec<D::Annotation>>>> {
3382        Ok(self
3383            .0
3384            .into_lines()?
3385            .into_iter()
3386            .map(RenderLine::into_tagged_line)
3387            .collect())
3388    }
3389}
3390
3391/// Reads and parses HTML from `input` and prepares a render tree.
3392pub fn parse(input: impl io::Read) -> Result<RenderTree> {
3393    let cfg = config::with_decorator(TrivialDecorator::new());
3394    cfg.do_parse(&mut cfg.make_context(), input)
3395}
3396
3397/// Reads HTML from `input`, decorates it using `decorator`, and
3398/// returns a `String` with text wrapped to `width` columns.
3399pub fn from_read_with_decorator<R, D>(input: R, width: usize, decorator: D) -> Result<String>
3400where
3401    R: io::Read,
3402    D: TextDecorator,
3403{
3404    config::with_decorator(decorator).string_from_read(input, width)
3405}
3406
3407/// Reads HTML from `input`, and returns a `String` with text wrapped to
3408/// `width` columns.
3409pub fn from_read<R>(input: R, width: usize) -> Result<String>
3410where
3411    R: io::Read,
3412{
3413    config::plain().string_from_read(input, width)
3414}
3415
3416/// Reads HTML from `input`, and returns text wrapped to `width` columns.
3417///
3418/// The text is returned as a `Vec<TaggedLine<_>>`; the annotations are vectors
3419/// of `RichAnnotation`.  The "outer" annotation comes first in the `Vec`.
3420pub fn from_read_rich<R>(input: R, width: usize) -> Result<Vec<TaggedLine<Vec<RichAnnotation>>>>
3421where
3422    R: io::Read,
3423{
3424    config::rich().lines_from_read(input, width)
3425}
3426
3427mod ansi_colours;
3428
3429pub use ansi_colours::from_read_coloured;
3430
3431#[cfg(test)]
3432mod tests;
3433
3434fn calc_ol_prefix_size<D: TextDecorator>(start: i64, num_items: usize, decorator: &D) -> usize {
3435    // The prefix width could be at either end if the start is negative.
3436    let min_number = start;
3437    // Assumption: num_items can't overflow isize.
3438    let max_number = start + (num_items as i64) - 1;
3439
3440    // This assumes that the decorator gives the same width as default.
3441    let prefix_width_min = decorator.ordered_item_prefix(min_number).len();
3442    let prefix_width_max = decorator.ordered_item_prefix(max_number).len();
3443    max(prefix_width_min, prefix_width_max)
3444}