Skip to main content

verso/reader/
styled.rs

1use scraper::{Html, Node};
2
3#[derive(Debug, Default, Clone, PartialEq, Eq)]
4pub struct Style {
5    pub bold: bool,
6    pub italic: bool,
7    pub code: bool,
8    pub link: bool,
9    pub heading: Option<u8>,
10}
11
12#[derive(Debug, Clone, PartialEq, Eq)]
13pub struct Span {
14    pub text: String,
15    pub style: Style,
16    /// character offset into the plain-text extraction of this spine item.
17    pub char_offset: usize,
18}
19
20pub fn to_spans(html: &str) -> Vec<Span> {
21    let doc = Html::parse_document(html);
22    let mut offset = 0usize;
23    let mut out = Vec::new();
24    walk(doc.root_element(), Style::default(), &mut offset, &mut out);
25    out
26}
27
28fn walk(node: scraper::ElementRef, style: Style, offset: &mut usize, out: &mut Vec<Span>) {
29    for child in node.children() {
30        match child.value() {
31            Node::Text(t) => {
32                let text = t.to_string();
33                if text.is_empty() {
34                    continue;
35                }
36                let len = text.chars().count();
37                out.push(Span {
38                    text,
39                    style: style.clone(),
40                    char_offset: *offset,
41                });
42                *offset += len;
43            }
44            Node::Element(el) => {
45                let name = el.name();
46                if matches!(name, "script" | "style" | "iframe" | "object" | "embed") {
47                    continue;
48                }
49                let mut s = style.clone();
50                match name {
51                    "em" | "i" => s.italic = true,
52                    "strong" | "b" => s.bold = true,
53                    "code" | "kbd" | "samp" => s.code = true,
54                    "a" => s.link = true,
55                    "h1" => s.heading = Some(1),
56                    "h2" => s.heading = Some(2),
57                    "h3" => s.heading = Some(3),
58                    "h4" => s.heading = Some(4),
59                    "h5" => s.heading = Some(5),
60                    "h6" => s.heading = Some(6),
61                    _ => {}
62                }
63                if let Some(er) = scraper::ElementRef::wrap(child) {
64                    walk(er, s, offset, out);
65                }
66            }
67            _ => {}
68        }
69    }
70}