verso/reader/
book.rs

1//! Pure helpers for navigating an open EPUB: spine-item loading and
2//! table-of-contents resolution. Kept free of any TUI dependency so
3//! they can be unit-tested without a terminal.
4
5use anyhow::{anyhow, Result};
6use rbook::Epub;
7
8use super::{page, page::Page, sanitize, styled};
9
10/// Raw data describing a single spine item.
11pub struct SpineData {
12    /// Paginated rows at the given width/height.
13    pub pages: Vec<Page>,
14    /// Flat plain text of the sanitised chapter HTML.
15    pub plain_text: String,
16    /// Char count of `plain_text`.
17    pub plain_text_chars: usize,
18}
19
20/// Collect manifest hrefs for every spine item, in order.
21pub fn spine_hrefs(book: &Epub) -> Result<Vec<String>> {
22    let spine = book.spine().elements();
23    let mut out: Vec<String> = Vec::with_capacity(spine.len());
24    for el in &spine {
25        let idref = el.name();
26        let item = book
27            .manifest()
28            .by_id(idref)
29            .ok_or_else(|| anyhow!("manifest missing idref {}", idref))?;
30        out.push(item.value().to_string());
31    }
32    Ok(out)
33}
34
35/// Compute human-readable titles per spine item.
36///
37/// Looks up each spine href in the TOC (NAV/NCX). Fragments are stripped
38/// when matching (`ch04.xhtml#heading` matches `ch04.xhtml`). Spine items
39/// without a TOC entry fall back to `"Chapter {i+1}"`.
40pub fn chapter_titles_from_book(book: &Epub) -> Vec<String> {
41    let spine_hrefs_result = spine_hrefs(book);
42    let hrefs = match spine_hrefs_result {
43        Ok(h) => h,
44        Err(_) => return Vec::new(),
45    };
46
47    // Build TOC href -> label, matching on the path portion only.
48    let toc_entries: Vec<(String, String)> = book
49        .toc()
50        .elements_flat()
51        .into_iter()
52        .filter_map(|e| {
53            let label = e.name().trim().to_string();
54            let href = e.value().trim().to_string();
55            if label.is_empty() || href.is_empty() {
56                None
57            } else {
58                Some((strip_fragment(&href).to_string(), label))
59            }
60        })
61        .collect();
62
63    hrefs
64        .iter()
65        .enumerate()
66        .map(|(i, href)| {
67            let href_path = strip_fragment(href);
68            // Prefer full-path match; fall back to basename match for TOC
69            // entries that use relative hrefs without directory prefixes.
70            let exact = toc_entries
71                .iter()
72                .find(|(h, _)| h == href_path)
73                .map(|(_, l)| l.clone());
74            let by_base = exact.or_else(|| {
75                let target_base = basename(href_path);
76                toc_entries
77                    .iter()
78                    .find(|(h, _)| basename(h) == target_base)
79                    .map(|(_, l)| l.clone())
80            });
81            by_base.unwrap_or_else(|| format!("Chapter {}", i + 1))
82        })
83        .collect()
84}
85
86/// Read the spine item at `idx`, sanitise and paginate to `width` x `height`.
87pub fn load_spine_data(book: &Epub, idx: usize, width: u16, height: u16) -> Result<SpineData> {
88    let hrefs = spine_hrefs(book)?;
89    let href = hrefs
90        .get(idx)
91        .ok_or_else(|| anyhow!("spine index {} out of bounds (len {})", idx, hrefs.len()))?;
92    let html = book.read_file(href)?;
93    Ok(load_spine_from_html(&html, width, height))
94}
95
96/// Same as `load_spine_data` but takes pre-fetched HTML. Used by the
97/// html-only entry point in the CLI.
98pub fn load_spine_from_html(html: &str, width: u16, height: u16) -> SpineData {
99    let safe = sanitize::clean(html);
100    let spans = styled::to_spans(&safe);
101    let plain_text: String = spans
102        .iter()
103        .map(|s| s.text.as_str())
104        .collect::<Vec<_>>()
105        .concat();
106    let pages = page::paginate(&spans, width, height.saturating_sub(2));
107    let plain_text_chars = plain_text.chars().count();
108    SpineData {
109        pages,
110        plain_text,
111        plain_text_chars,
112    }
113}
114
115fn strip_fragment(s: &str) -> &str {
116    match s.find('#') {
117        Some(i) => &s[..i],
118        None => s,
119    }
120}
121
122fn basename(s: &str) -> &str {
123    match s.rfind('/') {
124        Some(i) => &s[i + 1..],
125        None => s,
126    }
127}
verso/reader/book.rs

verso/reader/
book.rs