epub_parser/types.rs
1//! Type definitions for EPUB book components.
2//!
3//! This module contains all the data structures used to represent
4//! the extracted content from an EPUB file:
5//! - `Metadata`: Dublin Core metadata
6//! - `Page`: Text content pages
7//! - `Image`: Images including cover
8//! - `TocEntry`: Table of contents navigation
9
10/// Dublin Core metadata extracted from an EPUB file.
11///
12/// This struct contains standard Dublin Core metadata fields as defined
13/// in the EPUB specification. All fields are optional as not all EPUBs
14/// contain complete metadata.
15///
16/// # Example
17///
18/// ```
19/// use epub_parser::Metadata;
20///
21/// let metadata = Metadata {
22/// title: Some("Example Book".to_string()),
23/// author: Some("John Doe".to_string()),
24/// publisher: Some("Example Press".to_string()),
25/// language: Some("en".to_string()),
26/// ..Default::default()
27/// };
28///
29/// println!("Book: {}", metadata.title.unwrap_or_default());
30/// ```
31#[derive(Debug, Clone, Default)]
32pub struct Metadata {
33 /// The title of the book.
34 ///
35 /// Maps to the Dublin Core `dc:title` element.
36 pub title: Option<String>,
37
38 /// The author or creator of the book.
39 ///
40 /// Maps to the Dublin Core `dc:creator` element.
41 pub author: Option<String>,
42
43 /// The publisher of the book.
44 ///
45 /// Maps to the Dublin Core `dc:publisher` element.
46 pub publisher: Option<String>,
47
48 /// The language code (e.g., "en", "fr", "zh").
49 ///
50 /// Maps to the Dublin Core `dc:language` element.
51 pub language: Option<String>,
52
53 /// A unique identifier for the book (e.g., ISBN, UUID).
54 ///
55 /// Maps to the Dublin Core `dc:identifier` element.
56 pub identifier: Option<String>,
57
58 /// The publication date.
59 ///
60 /// Maps to the Dublin Core `dc:date` element.
61 /// Typically in YYYY-MM-DD format.
62 pub date: Option<String>,
63
64 /// The copyright or rights statement.
65 ///
66 /// Maps to the Dublin Core `dc:rights` element.
67 pub rights: Option<String>,
68}
69
70impl Metadata {
71 /// Creates a new, empty Metadata instance.
72 ///
73 /// # Returns
74 ///
75 /// A `Metadata` struct with all fields set to `None`.
76 pub fn new() -> Self {
77 Self::default()
78 }
79}
80
81/// A single page of text content from an EPUB book.
82///
83/// Pages are extracted from the EPUB's HTML/XHTML content files in the
84/// order defined by the spine element in the OPF file. Each page
85/// contains the plain text content with HTML tags stripped.
86///
87/// # Example
88///
89/// ```
90/// use epub_parser::Page;
91///
92/// let page = Page::new(0, "Chapter 1\n\nIt was a dark and stormy night...".to_string());
93/// println!("Page {}: {} characters", page.index, page.content.len());
94/// ```
95#[derive(Debug, Clone, Default)]
96pub struct Page {
97 /// The position of this page in the reading order (0-indexed).
98 pub index: usize,
99
100 /// The plain text content of the page.
101 ///
102 /// HTML tags are stripped, and the text is cleaned of control characters.
103 /// Paragraphs and other block elements are separated by newlines.
104 pub content: String,
105}
106
107impl Page {
108 /// Creates a new Page with the given index and content.
109 ///
110 /// # Arguments
111 ///
112 /// * `index` - The position in the reading order.
113 /// * `content` - The plain text content of the page.
114 ///
115 /// # Returns
116 ///
117 /// A new `Page` instance.
118 ///
119 /// # Example
120 ///
121 /// ```
122 /// use epub_parser::Page;
123 ///
124 /// let page = Page::new(5, "Some text content".to_string());
125 /// assert_eq!(page.index, 5);
126 /// ```
127 pub fn new(index: usize, content: String) -> Self {
128 Page { index, content }
129 }
130}
131
132/// An image extracted from an EPUB file.
133///
134/// Images include both the metadata (ID, href, media type) and optionally
135/// the binary content. The first image in the EPUB's images vector is
136/// typically the cover image.
137///
138/// # Example
139///
140/// ```
141/// use epub_parser::Image;
142///
143/// let image = Image {
144/// id: "cover".to_string(),
145/// href: "images/cover.jpg".to_string(),
146/// media_type: "image/jpeg".to_string(),
147/// content: None,
148/// };
149///
150/// println!("Image: {} ({})", image.href, image.media_type);
151/// ```
152#[derive(Debug, Clone, Default)]
153pub struct Image {
154 /// The unique identifier for this image from the manifest.
155 ///
156 /// This corresponds to the `id` attribute in the OPF manifest.
157 pub id: String,
158
159 /// The path to the image within the EPUB archive.
160 ///
161 /// This is a relative path that can be used to locate the image file
162 /// within the EPUB's ZIP structure.
163 pub href: String,
164
165 /// The MIME type of the image.
166 ///
167 /// Common values include "image/jpeg", "image/png", "image/gif", etc.
168 pub media_type: String,
169
170 /// The binary content of the image.
171 ///
172 /// This is the raw bytes of the image file.
173 pub content: Vec<u8>,
174}
175
176/// An entry in the EPUB table of contents.
177///
178/// EPUB navigation uses NCX (Navigation Control for XML) files which define
179/// a hierarchical structure of navigation points. Each entry can have
180/// child entries, creating a tree structure.
181///
182/// # Example
183///
184/// ```
185/// use epub_parser::TocEntry;
186///
187/// let mut toc_entry = TocEntry::new("Chapter 1".to_string(), "chapter1.xhtml".to_string());
188/// toc_entry.children.push(TocEntry::new(
189/// "Section 1.1".to_string(),
190/// "chapter1.xhtml#section1".to_string()
191/// ));
192///
193/// println!("Entry: {} -> {}", toc_entry.label, toc_entry.href);
194/// for child in &toc_entry.children {
195/// println!(" Child: {} -> {}", child.label, child.href);
196/// }
197/// ```
198#[derive(Debug, Clone, Default)]
199pub struct TocEntry {
200 /// The display label or title for this navigation point.
201 ///
202 /// This is the text that would be shown in a table of contents.
203 pub label: String,
204
205 /// The target URL for this navigation point.
206 ///
207 /// This is a relative path within the EPUB, often with an anchor
208 /// (e.g., "chapter1.xhtml" or "chapter1.xhtml#section1").
209 pub href: String,
210
211 /// Child navigation entries.
212 ///
213 /// The NCX format supports hierarchical navigation, so each entry
214 /// can have nested sub-entries.
215 pub children: Vec<TocEntry>,
216}
217
218impl TocEntry {
219 /// Creates a new TOC entry with the given label and href.
220 ///
221 /// The children vector is initialized as empty.
222 ///
223 /// # Arguments
224 ///
225 /// * `label` - The display text for this entry.
226 /// * `href` - The target URL/path for this entry.
227 ///
228 /// # Returns
229 ///
230 /// A new `TocEntry` instance with empty children.
231 ///
232 /// # Example
233 ///
234 /// ```
235 /// use epub_parser::TocEntry;
236 ///
237 /// let entry = TocEntry::new("Introduction".to_string(), "intro.xhtml".to_string());
238 /// assert_eq!(entry.label, "Introduction");
239 /// assert_eq!(entry.href, "intro.xhtml");
240 /// assert!(entry.children.is_empty());
241 /// ```
242 pub fn new(label: String, href: String) -> Self {
243 TocEntry {
244 label,
245 href,
246 children: Vec::new(),
247 }
248 }
249}