epub_parser/
types.rs

1//! Type definitions for EPUB book components.
2//!
3//! This module contains all the data structures used to represent
4//! the extracted content from an EPUB file:
5//! - `Metadata`: Dublin Core metadata
6//! - `Page`: Text content pages
7//! - `Image`: Images including cover
8//! - `TocEntry`: Table of contents navigation
9
10/// Dublin Core metadata extracted from an EPUB file.
11///
12/// This struct contains standard Dublin Core metadata fields as defined
13/// in the EPUB specification. All fields are optional as not all EPUBs
14/// contain complete metadata.
15///
16/// # Example
17///
18/// ```
19/// use epub_parser::Metadata;
20///
21/// let metadata = Metadata {
22///     title: Some("Example Book".to_string()),
23///     author: Some("John Doe".to_string()),
24///     publisher: Some("Example Press".to_string()),
25///     language: Some("en".to_string()),
26///     ..Default::default()
27/// };
28///
29/// println!("Book: {}", metadata.title.unwrap_or_default());
30/// ```
31#[derive(Debug, Clone, Default)]
32pub struct Metadata {
33    /// The title of the book.
34    ///
35    /// Maps to the Dublin Core `dc:title` element.
36    pub title: Option<String>,
37
38    /// The author or creator of the book.
39    ///
40    /// Maps to the Dublin Core `dc:creator` element.
41    pub author: Option<String>,
42
43    /// The publisher of the book.
44    ///
45    /// Maps to the Dublin Core `dc:publisher` element.
46    pub publisher: Option<String>,
47
48    /// The language code (e.g., "en", "fr", "zh").
49    ///
50    /// Maps to the Dublin Core `dc:language` element.
51    pub language: Option<String>,
52
53    /// A unique identifier for the book (e.g., ISBN, UUID).
54    ///
55    /// Maps to the Dublin Core `dc:identifier` element.
56    pub identifier: Option<String>,
57
58    /// The publication date.
59    ///
60    /// Maps to the Dublin Core `dc:date` element.
61    /// Typically in YYYY-MM-DD format.
62    pub date: Option<String>,
63
64    /// The copyright or rights statement.
65    ///
66    /// Maps to the Dublin Core `dc:rights` element.
67    pub rights: Option<String>,
68}
69
70impl Metadata {
71    /// Creates a new, empty Metadata instance.
72    ///
73    /// # Returns
74    ///
75    /// A `Metadata` struct with all fields set to `None`.
76    pub fn new() -> Self {
77        Self::default()
78    }
79}
80
81/// A single page of text content from an EPUB book.
82///
83/// Pages are extracted from the EPUB's HTML/XHTML content files in the
84/// order defined by the spine element in the OPF file. Each page
85/// contains the plain text content with HTML tags stripped.
86///
87/// # Example
88///
89/// ```
90/// use epub_parser::Page;
91///
92/// let page = Page::new(0, "Chapter 1\n\nIt was a dark and stormy night...".to_string());
93/// println!("Page {}: {} characters", page.index, page.content.len());
94/// ```
95#[derive(Debug, Clone, Default)]
96pub struct Page {
97    /// The position of this page in the reading order (0-indexed).
98    pub index: usize,
99
100    /// The plain text content of the page.
101    ///
102    /// HTML tags are stripped, and the text is cleaned of control characters.
103    /// Paragraphs and other block elements are separated by newlines.
104    pub content: String,
105}
106
107impl Page {
108    /// Creates a new Page with the given index and content.
109    ///
110    /// # Arguments
111    ///
112    /// * `index` - The position in the reading order.
113    /// * `content` - The plain text content of the page.
114    ///
115    /// # Returns
116    ///
117    /// A new `Page` instance.
118    ///
119    /// # Example
120    ///
121    /// ```
122    /// use epub_parser::Page;
123    ///
124    /// let page = Page::new(5, "Some text content".to_string());
125    /// assert_eq!(page.index, 5);
126    /// ```
127    pub fn new(index: usize, content: String) -> Self {
128        Page { index, content }
129    }
130}
131
132/// An image extracted from an EPUB file.
133///
134/// Images include both the metadata (ID, href, media type) and optionally
135/// the binary content. The first image in the EPUB's images vector is
136/// typically the cover image.
137///
138/// # Example
139///
140/// ```
141/// use epub_parser::Image;
142///
143/// let image = Image {
144///     id: "cover".to_string(),
145///     href: "images/cover.jpg".to_string(),
146///     media_type: "image/jpeg".to_string(),
147///     content: None,
148/// };
149///
150/// println!("Image: {} ({})", image.href, image.media_type);
151/// ```
152#[derive(Debug, Clone, Default)]
153pub struct Image {
154    /// The unique identifier for this image from the manifest.
155    ///
156    /// This corresponds to the `id` attribute in the OPF manifest.
157    pub id: String,
158
159    /// The path to the image within the EPUB archive.
160    ///
161    /// This is a relative path that can be used to locate the image file
162    /// within the EPUB's ZIP structure.
163    pub href: String,
164
165    /// The MIME type of the image.
166    ///
167    /// Common values include "image/jpeg", "image/png", "image/gif", etc.
168    pub media_type: String,
169
170    /// The binary content of the image.
171    ///
172    /// This is `Some(Vec<u8>)` if the image was successfully read,
173    /// or `None` if the image file could not be found or read.
174    pub content: Option<Vec<u8>>,
175}
176
177/// An entry in the EPUB table of contents.
178///
179/// EPUB navigation uses NCX (Navigation Control for XML) files which define
180/// a hierarchical structure of navigation points. Each entry can have
181/// child entries, creating a tree structure.
182///
183/// # Example
184///
185/// ```
186/// use epub_parser::TocEntry;
187///
188/// let mut toc_entry = TocEntry::new("Chapter 1".to_string(), "chapter1.xhtml".to_string());
189/// toc_entry.children.push(TocEntry::new(
190///     "Section 1.1".to_string(),
191///     "chapter1.xhtml#section1".to_string()
192/// ));
193///
194/// println!("Entry: {} -> {}", toc_entry.label, toc_entry.href);
195/// for child in &toc_entry.children {
196///     println!("  Child: {} -> {}", child.label, child.href);
197/// }
198/// ```
199#[derive(Debug, Clone, Default)]
200pub struct TocEntry {
201    /// The display label or title for this navigation point.
202    ///
203    /// This is the text that would be shown in a table of contents.
204    pub label: String,
205
206    /// The target URL for this navigation point.
207    ///
208    /// This is a relative path within the EPUB, often with an anchor
209    /// (e.g., "chapter1.xhtml" or "chapter1.xhtml#section1").
210    pub href: String,
211
212    /// Child navigation entries.
213    ///
214    /// The NCX format supports hierarchical navigation, so each entry
215    /// can have nested sub-entries.
216    pub children: Vec<TocEntry>,
217}
218
219impl TocEntry {
220    /// Creates a new TOC entry with the given label and href.
221    ///
222    /// The children vector is initialized as empty.
223    ///
224    /// # Arguments
225    ///
226    /// * `label` - The display text for this entry.
227    /// * `href` - The target URL/path for this entry.
228    ///
229    /// # Returns
230    ///
231    /// A new `TocEntry` instance with empty children.
232    ///
233    /// # Example
234    ///
235    /// ```
236    /// use epub_parser::TocEntry;
237    ///
238    /// let entry = TocEntry::new("Introduction".to_string(), "intro.xhtml".to_string());
239    /// assert_eq!(entry.label, "Introduction");
240    /// assert_eq!(entry.href, "intro.xhtml");
241    /// assert!(entry.children.is_empty());
242    /// ```
243    pub fn new(label: String, href: String) -> Self {
244        TocEntry {
245            label,
246            href,
247            children: Vec::new(),
248        }
249    }
250}
epub_parser/types.rs

epub_parser/
types.rs