rig/loaders/epub/
text_processors.rs1use std::{convert::Infallible, error::Error};
2
3use quick_xml::Reader;
4use quick_xml::events::Event;
5
6pub trait TextProcessor {
11 type Error: Error + 'static;
12
13 fn process(text: &str) -> Result<String, Self::Error>;
14}
15
16pub struct RawTextProcessor;
17
18impl TextProcessor for RawTextProcessor {
19 type Error = Infallible;
20
21 fn process(text: &str) -> Result<String, Self::Error> {
22 Ok(text.to_string())
23 }
24}
25
26#[derive(thiserror::Error, Debug)]
27pub enum XmlProcessingError {
28 #[error("XML parsing error: {0}")]
29 Xml(#[from] quick_xml::Error),
30
31 #[error("Failed to unescape XML entity: {0}")]
32 Encoding(#[from] quick_xml::encoding::EncodingError),
33
34 #[error("Invalid UTF-8 sequence: {0}")]
35 Utf8(#[from] std::string::FromUtf8Error),
36}
37
38pub struct StripXmlProcessor;
39
40impl TextProcessor for StripXmlProcessor {
41 type Error = XmlProcessingError;
42
43 fn process(xml: &str) -> Result<String, Self::Error> {
44 let mut reader = Reader::from_str(xml.trim());
45
46 let mut result = String::with_capacity(xml.len() / 2); let mut last_was_text = false;
48
49 loop {
50 match reader.read_event()? {
51 Event::Text(e) => {
52 let text = e.decode()?;
53 if !text.trim().is_empty() {
54 if last_was_text {
55 result.push(' ');
56 }
57 result.push_str(&text);
58 last_was_text = true;
59 }
60 }
61 Event::CData(e) => {
62 let text = String::from_utf8(e.into_inner().into_owned())?;
63 if !text.trim().is_empty() {
64 if last_was_text {
65 result.push(' ');
66 }
67 result.push_str(&text);
68 last_was_text = true;
69 }
70 }
71 Event::Eof => break,
72 _ => {
73 last_was_text = false;
74 }
75 }
76 }
77
78 Ok(result)
79 }
80}