Skip to main content

verso/library/
epub_meta.rs

1use anyhow::Result;
2use rbook::Ebook;
3use std::path::Path;
4
5#[derive(Debug, Clone, PartialEq)]
6pub struct Meta {
7    pub title: String,
8    pub author: Option<String>,
9    pub language: Option<String>,
10    pub publisher: Option<String>,
11    pub published_at: Option<String>,
12    pub stable_id: Option<String>,
13    pub word_count: Option<u64>,
14    pub spine_items: usize,
15}
16
17pub fn extract(path: &Path) -> Result<Meta> {
18    let book = rbook::Epub::new(path)?;
19    let m = book.metadata();
20
21    let title = m.title().map(|s| s.value().to_string()).unwrap_or_default();
22    let author = m.creators().first().map(|c| c.value().to_string());
23    let language = m.language().map(|s| s.value().to_string());
24    let publisher = m.publisher().first().map(|s| s.value().to_string());
25    let published_at = m.date().map(|s| s.value().to_string());
26    let stable_id = m.unique_identifier().map(|s| s.value().to_string());
27
28    let spine_elements = book.spine().elements();
29    let spine_items = spine_elements.len();
30
31    let mut words: u64 = 0;
32    for el in &spine_elements {
33        if let Some(item) = book.manifest().by_id(el.name()) {
34            if let Ok(content) = book.read_file(item.value()) {
35                words += count_words(&content);
36            }
37        }
38    }
39
40    Ok(Meta {
41        title,
42        author,
43        language,
44        publisher,
45        published_at,
46        stable_id,
47        word_count: Some(words),
48        spine_items,
49    })
50}
51
52fn count_words(html: &str) -> u64 {
53    // Cheap estimate: strip tags, whitespace-split.
54    let text = strip_tags(html);
55    text.split_whitespace().count() as u64
56}
57
58fn strip_tags(html: &str) -> String {
59    let mut out = String::with_capacity(html.len());
60    let mut in_tag = false;
61    for ch in html.chars() {
62        match ch {
63            '<' => in_tag = true,
64            '>' => in_tag = false,
65            _ if !in_tag => out.push(ch),
66            _ => {}
67        }
68    }
69    out
70}