Skip to main content

epub_parser/utils/
xml.rs

1//! XML parsing utilities for the EPUB parser.
2//!
3//! This module provides helper functions for common XML parsing tasks,
4//! particularly for extracting text content from XML elements.
5
6use quick_xml::events::Event;
7
8/// A utility for parsing XML content.
9///
10/// This struct provides helper methods for common XML parsing operations
11/// used when extracting data from EPUB files (OPF, NCX, and HTML content).
12pub struct XmlParser;
13
14impl XmlParser {
15    /// Extracts text content from an XML reader.
16    ///
17    /// Reads events from the XML reader until a text event is found or
18    /// the element ends. This is useful for extracting the text content
19    /// of XML elements like `<title>`, `<creator>`, etc.
20    ///
21    /// # Arguments
22    ///
23    /// * `reader` - The XML reader to read events from.
24    /// * `buf` - A buffer for reading events (will be cleared automatically).
25    ///
26    /// # Returns
27    ///
28    /// Returns `Ok(Some(String))` if text was found, `Ok(None)` if no
29    /// text was found before the element ended, or an error if parsing fails.
30    ///
31    /// # Errors
32    ///
33    /// Returns an error if there is an XML parsing error.
34    ///
35    /// # Example
36    ///
37    /// ```
38    /// use quick_xml::Reader;
39    /// use epub_parser::utils::XmlParser;
40    ///
41    /// let xml = r#"<title>My Book</title>"#;
42    /// let mut reader = Reader::from_str(xml);
43    /// let mut buf = Vec::new();
44    ///
45    /// // Skip the Start event
46    /// reader.read_event_into(&mut buf).unwrap();
47    ///
48    /// let text = XmlParser::extract_text(&mut reader, &mut buf).unwrap();
49    /// assert_eq!(text, Some("My Book".to_string()));
50    /// ```
51    pub fn extract_text<R: std::io::BufRead>(
52        reader: &mut quick_xml::Reader<R>,
53        buf: &mut Vec<u8>,
54    ) -> Result<Option<String>, Box<dyn std::error::Error>> {
55        let mut text = String::new();
56
57        loop {
58            match reader.read_event_into(buf) {
59                Ok(Event::Text(e)) => {
60                    text = e.unescape()?.into_owned();
61                    text = text.trim().to_string();
62                    if !text.is_empty() {
63                        break;
64                    }
65                }
66                Ok(Event::End(_)) => break,
67                Ok(Event::Eof) => break,
68                Err(e) => return Err(e.into()),
69                _ => {}
70            }
71            buf.clear();
72        }
73
74        Ok(if text.is_empty() { None } else { Some(text) })
75    }
76}