epub_parser/utils/xml.rs
1//! XML parsing utilities for the EPUB parser.
2//!
3//! This module provides helper functions for common XML parsing tasks,
4//! particularly for extracting text content from XML elements.
5
6use quick_xml::events::Event;
7
8/// A utility for parsing XML content.
9///
10/// This struct provides helper methods for common XML parsing operations
11/// used when extracting data from EPUB files (OPF, NCX, and HTML content).
12pub struct XmlParser;
13
14impl XmlParser {
15 /// Extracts text content from an XML reader.
16 ///
17 /// Reads events from the XML reader until a text event is found or
18 /// the element ends. This is useful for extracting the text content
19 /// of XML elements like `<title>`, `<creator>`, etc.
20 ///
21 /// # Arguments
22 ///
23 /// * `reader` - The XML reader to read events from.
24 /// * `buf` - A buffer for reading events (will be cleared automatically).
25 ///
26 /// # Returns
27 ///
28 /// Returns `Ok(Some(String))` if text was found, `Ok(None)` if no
29 /// text was found before the element ended, or an error if parsing fails.
30 ///
31 /// # Errors
32 ///
33 /// Returns an error if there is an XML parsing error.
34 ///
35 /// # Example
36 ///
37 /// ```
38 /// use quick_xml::Reader;
39 /// use epub_parser::utils::XmlParser;
40 ///
41 /// let xml = r#"<title>My Book</title>"#;
42 /// let mut reader = Reader::from_str(xml);
43 /// let mut buf = Vec::new();
44 ///
45 /// // Skip the Start event
46 /// reader.read_event_into(&mut buf).unwrap();
47 ///
48 /// let text = XmlParser::extract_text(&mut reader, &mut buf).unwrap();
49 /// assert_eq!(text, Some("My Book".to_string()));
50 /// ```
51 pub fn extract_text<R: std::io::BufRead>(
52 reader: &mut quick_xml::Reader<R>,
53 buf: &mut Vec<u8>,
54 ) -> Result<Option<String>, Box<dyn std::error::Error>> {
55 let mut text = String::new();
56
57 loop {
58 match reader.read_event_into(buf) {
59 Ok(Event::Text(e)) => {
60 text = e.unescape()?.into_owned();
61 text = text.trim().to_string();
62 if !text.is_empty() {
63 break;
64 }
65 }
66 Ok(Event::End(_)) => break,
67 Ok(Event::Eof) => break,
68 Err(e) => return Err(e.into()),
69 _ => {}
70 }
71 buf.clear();
72 }
73
74 Ok(if text.is_empty() { None } else { Some(text) })
75 }
76}