verso/library/
epub_meta.rs1use anyhow::Result;
2use rbook::Ebook;
3use std::path::Path;
4
5#[derive(Debug, Clone, PartialEq)]
6pub struct Meta {
7 pub title: String,
8 pub author: Option<String>,
9 pub language: Option<String>,
10 pub publisher: Option<String>,
11 pub published_at: Option<String>,
12 pub stable_id: Option<String>,
13 pub word_count: Option<u64>,
14 pub spine_items: usize,
15}
16
17pub fn extract(path: &Path) -> Result<Meta> {
18 let book = rbook::Epub::new(path)?;
19 let m = book.metadata();
20
21 let title = m.title().map(|s| s.value().to_string()).unwrap_or_default();
22 let author = m.creators().first().map(|c| c.value().to_string());
23 let language = m.language().map(|s| s.value().to_string());
24 let publisher = m.publisher().first().map(|s| s.value().to_string());
25 let published_at = m.date().map(|s| s.value().to_string());
26 let stable_id = m.unique_identifier().map(|s| s.value().to_string());
27
28 let spine_elements = book.spine().elements();
29 let spine_items = spine_elements.len();
30
31 let mut words: u64 = 0;
32 for el in &spine_elements {
33 if let Some(item) = book.manifest().by_id(el.name()) {
34 if let Ok(content) = book.read_file(item.value()) {
35 words += count_words(&content);
36 }
37 }
38 }
39
40 Ok(Meta {
41 title,
42 author,
43 language,
44 publisher,
45 published_at,
46 stable_id,
47 word_count: Some(words),
48 spine_items,
49 })
50}
51
52fn count_words(html: &str) -> u64 {
53 let text = strip_tags(html);
55 text.split_whitespace().count() as u64
56}
57
58fn strip_tags(html: &str) -> String {
59 let mut out = String::with_capacity(html.len());
60 let mut in_tag = false;
61 for ch in html.chars() {
62 match ch {
63 '<' => in_tag = true,
64 '>' => in_tag = false,
65 _ if !in_tag => out.push(ch),
66 _ => {}
67 }
68 }
69 out
70}