epub_parser/types.rs
1//! Type definitions for EPUB book components.
2//!
3//! This module contains all the data structures used to represent
4//! the extracted content from an EPUB file:
5//! - `Metadata`: Dublin Core metadata
6//! - `Page`: Text content pages
7//! - `Image`: Images including cover
8//! - `TocEntry`: Table of contents navigation
9
10/// Dublin Core metadata extracted from an EPUB file.
11///
12/// This struct contains standard Dublin Core metadata fields as defined
13/// in the EPUB specification. All fields are optional as not all EPUBs
14/// contain complete metadata.
15///
16/// # Example
17///
18/// ```
19/// use epub_parser::Metadata;
20///
21/// let metadata = Metadata {
22/// title: Some("Example Book".to_string()),
23/// author: Some("John Doe".to_string()),
24/// publisher: Some("Example Press".to_string()),
25/// language: Some("en".to_string()),
26/// ..Default::default()
27/// };
28///
29/// println!("Book: {}", metadata.title.unwrap_or_default());
30/// ```
31#[derive(Debug, Clone, Default)]
32pub struct Metadata {
33 /// The title of the book.
34 ///
35 /// Maps to the Dublin Core `dc:title` element.
36 pub title: Option<String>,
37
38 /// The author or creator of the book.
39 ///
40 /// Maps to the Dublin Core `dc:creator` element.
41 pub author: Option<String>,
42
43 /// The publisher of the book.
44 ///
45 /// Maps to the Dublin Core `dc:publisher` element.
46 pub publisher: Option<String>,
47
48 /// The language code (e.g., "en", "fr", "zh").
49 ///
50 /// Maps to the Dublin Core `dc:language` element.
51 pub language: Option<String>,
52
53 /// A unique identifier for the book (e.g., ISBN, UUID).
54 ///
55 /// Maps to the Dublin Core `dc:identifier` element.
56 pub identifier: Option<String>,
57
58 /// The publication date.
59 ///
60 /// Maps to the Dublin Core `dc:date` element.
61 /// Typically in YYYY-MM-DD format.
62 pub date: Option<String>,
63
64 /// The copyright or rights statement.
65 ///
66 /// Maps to the Dublin Core `dc:rights` element.
67 pub rights: Option<String>,
68}
69
70impl Metadata {
71 /// Creates a new, empty Metadata instance.
72 ///
73 /// # Returns
74 ///
75 /// A `Metadata` struct with all fields set to `None`.
76 pub fn new() -> Self {
77 Self::default()
78 }
79}
80
81/// A single page of text content from an EPUB book.
82///
83/// Pages are extracted from the EPUB's HTML/XHTML content files in the
84/// order defined by the spine element in the OPF file. Each page
85/// contains the plain text content with HTML tags stripped.
86///
87/// # Example
88///
89/// ```
90/// use epub_parser::Page;
91///
92/// let page = Page::new(0, "Chapter 1\n\nIt was a dark and stormy night...".to_string());
93/// println!("Page {}: {} characters", page.index, page.content.len());
94/// ```
95#[derive(Debug, Clone, Default)]
96pub struct Page {
97 /// The position of this page in the reading order (0-indexed).
98 pub index: usize,
99
100 /// The plain text content of the page.
101 ///
102 /// HTML tags are stripped, and the text is cleaned of control characters.
103 /// Paragraphs and other block elements are separated by newlines.
104 pub content: String,
105}
106
107impl Page {
108 /// Creates a new Page with the given index and content.
109 ///
110 /// # Arguments
111 ///
112 /// * `index` - The position in the reading order.
113 /// * `content` - The plain text content of the page.
114 ///
115 /// # Returns
116 ///
117 /// A new `Page` instance.
118 ///
119 /// # Example
120 ///
121 /// ```
122 /// use epub_parser::Page;
123 ///
124 /// let page = Page::new(5, "Some text content".to_string());
125 /// assert_eq!(page.index, 5);
126 /// ```
127 pub fn new(index: usize, content: String) -> Self {
128 Page { index, content }
129 }
130}
131
132/// An image extracted from an EPUB file.
133///
134/// Images include both the metadata (ID, href, media type) and optionally
135/// the binary content. The first image in the EPUB's images vector is
136/// typically the cover image.
137///
138/// # Example
139///
140/// ```
141/// use epub_parser::Image;
142///
143/// let image = Image {
144/// id: "cover".to_string(),
145/// href: "images/cover.jpg".to_string(),
146/// media_type: "image/jpeg".to_string(),
147/// content: None,
148/// };
149///
150/// println!("Image: {} ({})", image.href, image.media_type);
151/// ```
152#[derive(Debug, Clone, Default)]
153pub struct Image {
154 /// The unique identifier for this image from the manifest.
155 ///
156 /// This corresponds to the `id` attribute in the OPF manifest.
157 pub id: String,
158
159 /// The path to the image within the EPUB archive.
160 ///
161 /// This is a relative path that can be used to locate the image file
162 /// within the EPUB's ZIP structure.
163 pub href: String,
164
165 /// The MIME type of the image.
166 ///
167 /// Common values include "image/jpeg", "image/png", "image/gif", etc.
168 pub media_type: String,
169
170 /// The binary content of the image.
171 ///
172 /// This is `Some(Vec<u8>)` if the image was successfully read,
173 /// or `None` if the image file could not be found or read.
174 pub content: Option<Vec<u8>>,
175}
176
177/// An entry in the EPUB table of contents.
178///
179/// EPUB navigation uses NCX (Navigation Control for XML) files which define
180/// a hierarchical structure of navigation points. Each entry can have
181/// child entries, creating a tree structure.
182///
183/// # Example
184///
185/// ```
186/// use epub_parser::TocEntry;
187///
188/// let mut toc_entry = TocEntry::new("Chapter 1".to_string(), "chapter1.xhtml".to_string());
189/// toc_entry.children.push(TocEntry::new(
190/// "Section 1.1".to_string(),
191/// "chapter1.xhtml#section1".to_string()
192/// ));
193///
194/// println!("Entry: {} -> {}", toc_entry.label, toc_entry.href);
195/// for child in &toc_entry.children {
196/// println!(" Child: {} -> {}", child.label, child.href);
197/// }
198/// ```
199#[derive(Debug, Clone, Default)]
200pub struct TocEntry {
201 /// The display label or title for this navigation point.
202 ///
203 /// This is the text that would be shown in a table of contents.
204 pub label: String,
205
206 /// The target URL for this navigation point.
207 ///
208 /// This is a relative path within the EPUB, often with an anchor
209 /// (e.g., "chapter1.xhtml" or "chapter1.xhtml#section1").
210 pub href: String,
211
212 /// Child navigation entries.
213 ///
214 /// The NCX format supports hierarchical navigation, so each entry
215 /// can have nested sub-entries.
216 pub children: Vec<TocEntry>,
217}
218
219impl TocEntry {
220 /// Creates a new TOC entry with the given label and href.
221 ///
222 /// The children vector is initialized as empty.
223 ///
224 /// # Arguments
225 ///
226 /// * `label` - The display text for this entry.
227 /// * `href` - The target URL/path for this entry.
228 ///
229 /// # Returns
230 ///
231 /// A new `TocEntry` instance with empty children.
232 ///
233 /// # Example
234 ///
235 /// ```
236 /// use epub_parser::TocEntry;
237 ///
238 /// let entry = TocEntry::new("Introduction".to_string(), "intro.xhtml".to_string());
239 /// assert_eq!(entry.label, "Introduction");
240 /// assert_eq!(entry.href, "intro.xhtml");
241 /// assert!(entry.children.is_empty());
242 /// ```
243 pub fn new(label: String, href: String) -> Self {
244 TocEntry {
245 label,
246 href,
247 children: Vec::new(),
248 }
249 }
250}