1use regex;
39use serde::Deserialize;
40use std::collections::HashMap;
41use std::error;
42use std::fs::File;
43use std::io::Read;
44use zip::read::ZipArchive;
45
46#[derive(Debug, Clone)]
48pub struct EpubFile {
49 pub id: String,
50 pub href: String,
51 pub title: Option<String>,
52 pub content: String,
53 pub media_type: String,
54}
55
56impl EpubFile {
57 pub fn get_id(&self) -> &str {
58 &self.id
59 }
60
61 pub fn get_href(&self) -> &str {
62 &self.href
63 }
64
65 pub fn get_title(&self) -> Option<&str> {
66 self.title.as_deref()
67 }
68
69 pub fn get_content(&self) -> &str {
70 &self.content
71 }
72
73 pub fn get_media_type(&self) -> &str {
74 &self.media_type
75 }
76
77 pub fn get_html_bytes(&self) -> &[u8] {
79 self.content.as_bytes()
80 }
81
82 pub fn is_html(&self) -> bool {
84 self.media_type == "application/xhtml+xml"
85 }
86
87 pub fn get_parsable_html(&self) -> &str {
89 &self.content
90 }
91}
92
93pub struct Chapter {
95 title: String,
96 files: Vec<EpubFile>,
97}
98
99impl Chapter {
100 pub fn get_title(&self) -> &str {
101 &self.title
102 }
103
104 pub fn get_files(&self) -> &[EpubFile] {
105 &self.files
106 }
107
108 pub fn get_file_count(&self) -> usize {
109 self.files.len()
110 }
111}
112
113#[derive(Debug, Clone)]
115pub struct TocEntry {
116 pub title: String,
117 pub href: String,
118 pub level: usize,
119}
120
121impl TocEntry {
122 pub fn get_title(&self) -> &str {
123 &self.title
124 }
125
126 pub fn get_href(&self) -> &str {
127 &self.href
128 }
129
130 pub fn get_level(&self) -> usize {
131 self.level
132 }
133}
134
135pub struct TableOfContents {
137 entries: Vec<TocEntry>,
138}
139
140impl TableOfContents {
141 pub fn new() -> Self {
142 TableOfContents {
143 entries: Vec::new(),
144 }
145 }
146
147 pub fn add_entry(&mut self, title: String, href: String, level: usize) {
148 self.entries.push(TocEntry { title, href, level });
149 }
150
151 pub fn get_entries(&self) -> &[TocEntry] {
152 &self.entries
153 }
154
155 pub fn get_entry_count(&self) -> usize {
156 self.entries.len()
157 }
158}
159
160#[derive(Debug, Deserialize)]
162struct Container {
163 #[serde(rename = "rootfiles")]
164 rootfiles: RootFiles,
165}
166
167#[derive(Debug, Deserialize)]
168struct RootFiles {
169 #[serde(rename = "rootfile")]
170 rootfile: Vec<RootFile>,
171}
172
173#[derive(Debug, Deserialize)]
174struct RootFile {
175 #[serde(rename = "@full-path", default)]
176 full_path: String,
177 #[serde(rename = "@media-type", default)]
178 media_type: String,
179}
180
181#[derive(Debug, Deserialize)]
183struct Package {
184 metadata: OpfMetadata,
185 manifest: Manifest,
186 spine: Spine,
187}
188
189#[derive(Debug, Deserialize)]
190struct OpfMetadata {
191 #[serde(rename = "dc:identifier", default)]
192 identifier: Vec<String>,
193 #[serde(rename = "dc:title")]
194 title: String,
195 #[serde(rename = "dc:creator")]
196 creator: String,
197 #[serde(rename = "dc:language")]
198 language: String,
199 #[serde(rename = "dc:date")]
200 date: String,
201 #[serde(rename = "dc:description")]
202 description: Option<String>,
203 #[serde(rename = "meta", default)]
204 meta: Vec<Meta>,
205}
206
207#[derive(Debug, Deserialize)]
208struct Meta {
209 #[serde(rename = "name")]
210 name: Option<String>,
211 #[serde(rename = "content")]
212 content: Option<String>,
213 #[serde(rename = "property")]
214 property: Option<String>,
215 #[serde(rename = "$text")]
216 value: Option<String>,
217}
218
219#[derive(Debug, Deserialize)]
220struct Manifest {
221 #[serde(rename = "item")]
222 item: Vec<ManifestItem>,
223}
224
225#[derive(Debug, Deserialize)]
226struct ManifestItem {
227 #[serde(rename = "@id")]
228 id: String,
229 #[serde(rename = "@href")]
230 href: String,
231 #[serde(rename = "@media-type")]
232 media_type: String,
233 #[serde(rename = "@properties")]
234 properties: Option<String>,
235}
236
237#[derive(Debug, Deserialize)]
238struct Spine {
239 #[serde(rename = "itemref")]
240 itemref: Vec<ItemRef>,
241}
242
243#[derive(Debug, Deserialize)]
244struct ItemRef {
245 #[serde(rename = "@idref")]
246 idref: String,
247}
248
249struct Metadata {
251 title: String,
252 creator: String,
253 language: String,
254 identifier: String,
255 date: String,
256 publisher: Option<String>,
257 description: Option<String>,
258 rights: Option<String>,
259 cover: Option<String>,
260 tags: Vec<String>,
261}
262
263impl Metadata {
264 pub fn new(
265 title: String,
266 creator: String,
267 language: String,
268 identifier: String,
269 date: String,
270 ) -> Self {
271 Metadata {
272 title,
273 creator,
274 language,
275 identifier,
276 date,
277 publisher: None,
278 description: None,
279 rights: None,
280 cover: None,
281 tags: vec![],
282 }
283 }
284}
285
286pub struct Epub {
288 metadata: Metadata,
289 chapters: Vec<Chapter>,
290 table_of_contents: TableOfContents,
291 all_files: Vec<EpubFile>,
292}
293
294impl Epub {
295 pub fn new(file_path: String) -> Result<Epub, Box<dyn error::Error>> {
303 let file = File::open(file_path)?;
304 let mut archive = ZipArchive::new(file)?;
305
306 let container = {
308 let mut container_file = archive.by_name("META-INF/container.xml")?;
309 let mut xml = String::new();
310 container_file.read_to_string(&mut xml)?;
311 parse_container_xml(&xml)?
312 };
313
314 let opf_path = &container.rootfiles.rootfile[0].full_path;
316 let package = {
317 let mut opf_file = archive.by_name(&opf_path)?;
318 let mut xml = String::new();
319 opf_file.read_to_string(&mut xml)?;
320 parse_opf_xml(&xml)?
321 };
322
323 let nav_titles = Self::parse_navigation(&mut archive, &package, &opf_path)?;
325
326 let mut metadata = Metadata::new(
328 package.metadata.title.clone(),
329 package.metadata.creator.clone(),
330 package.metadata.language.clone(),
331 package
332 .metadata
333 .identifier
334 .first()
335 .unwrap_or(&String::new())
336 .clone(),
337 package.metadata.date.clone(),
338 );
339
340 metadata.description = package.metadata.description.clone();
342
343 for meta in &package.metadata.meta {
345 if let (Some(name), Some(content)) = (&meta.name, &meta.content) {
346 if name == "cover" {
347 metadata.cover = Some(content.clone());
348 }
349 }
350 }
351
352 let all_files = Self::parse_all_files(&mut archive, &package, &nav_titles, &opf_path)?;
354
355 let table_of_contents = Self::create_table_of_contents(&nav_titles, &all_files);
357
358 let chapters = Self::group_files_into_chapters(&all_files, &package.spine);
360
361 Ok(Epub {
362 metadata,
363 chapters,
364 table_of_contents,
365 all_files,
366 })
367 }
368
369 pub fn get_title(&self) -> &str {
371 &self.metadata.title
372 }
373
374 pub fn get_creator(&self) -> &str {
375 &self.metadata.creator
376 }
377
378 pub fn get_language(&self) -> &str {
379 &self.metadata.language
380 }
381
382 pub fn get_identifier(&self) -> &str {
383 &self.metadata.identifier
384 }
385
386 pub fn get_date(&self) -> &str {
387 &self.metadata.date
388 }
389
390 pub fn get_publisher(&self) -> Option<&str> {
391 self.metadata.publisher.as_deref()
392 }
393
394 pub fn get_description(&self) -> Option<&str> {
395 self.metadata.description.as_deref()
396 }
397
398 pub fn get_rights(&self) -> Option<&str> {
399 self.metadata.rights.as_deref()
400 }
401
402 pub fn get_cover(&self) -> Option<&str> {
403 self.metadata.cover.as_deref()
404 }
405
406 pub fn get_tags(&self) -> &[String] {
407 &self.metadata.tags
408 }
409
410 pub fn get_chapters(&self) -> &[Chapter] {
411 &self.chapters
412 }
413
414 pub fn get_chapter_count(&self) -> usize {
415 self.chapters.len()
416 }
417
418 pub fn get_table_of_contents(&self) -> &TableOfContents {
419 &self.table_of_contents
420 }
421
422 pub fn get_all_files(&self) -> &[EpubFile] {
423 &self.all_files
424 }
425
426 pub fn get_file_count(&self) -> usize {
427 self.all_files.len()
428 }
429
430 fn parse_navigation(
431 archive: &mut ZipArchive<File>,
432 package: &Package,
433 opf_path: &str,
434 ) -> Result<HashMap<String, String>, Box<dyn error::Error>> {
435 let mut nav_titles = HashMap::new();
436
437 if let Some(nav_item) = package.manifest.item.iter().find(|item| {
439 item.properties
440 .as_ref()
441 .map_or(false, |props| props.contains("nav"))
442 }) {
443 let opf_dir = if let Some(slash_pos) = opf_path.rfind('/') {
445 &opf_path[..slash_pos + 1] } else {
447 "" };
449 let nav_path = format!("{}{}", opf_dir, nav_item.href);
450
451 match archive.by_name(&nav_path) {
453 Ok(mut nav_file) => {
454 let mut html = String::new();
455 nav_file.read_to_string(&mut html)?;
456
457 let pattern = r#"<a\s+href="([^"]+)"[^>]*>([^<]+)</a>"#;
460 if let Ok(re) = regex::Regex::new(pattern) {
461 for cap in re.captures_iter(&html) {
462 if let (Some(href), Some(text)) = (cap.get(1), cap.get(2)) {
463 let href_str = href.as_str().to_string();
464 let text_str = text.as_str().trim().to_string();
465 nav_titles.insert(href_str, text_str);
466 }
467 }
468 }
469 }
470 Err(_) => {
471 }
473 }
474 }
475
476 Ok(nav_titles)
477 }
478
479 fn parse_all_files(
480 archive: &mut ZipArchive<File>,
481 package: &Package,
482 nav_titles: &HashMap<String, String>,
483 opf_path: &str,
484 ) -> Result<Vec<EpubFile>, Box<dyn error::Error>> {
485 let mut files = Vec::new();
486
487 let opf_dir = if let Some(slash_pos) = opf_path.rfind('/') {
489 &opf_path[..slash_pos + 1] } else {
491 "" };
493
494 for manifest_item in &package.manifest.item {
495 if manifest_item.media_type == "application/xhtml+xml" {
496 let is_nav = manifest_item
498 .properties
499 .as_ref()
500 .map_or(false, |props| props.contains("nav"));
501
502 if is_nav {
503 continue;
504 }
505
506 let file_path = format!("{}{}", opf_dir, manifest_item.href);
508
509 match archive.by_name(&file_path) {
510 Ok(mut file) => {
511 let mut content = String::new();
512 file.read_to_string(&mut content)?;
513
514 let epub_file = EpubFile {
515 id: manifest_item.id.clone(),
516 href: manifest_item.href.clone(),
517 title: nav_titles.get(&manifest_item.href).cloned(),
518 content,
519 media_type: manifest_item.media_type.clone(),
520 };
521
522 files.push(epub_file);
523 }
524 Err(_) => {
525 continue;
527 }
528 }
529 }
530 }
531
532 Ok(files)
533 }
534
535 fn create_table_of_contents(
536 _nav_titles: &HashMap<String, String>,
537 all_files: &[EpubFile],
538 ) -> TableOfContents {
539 let mut toc = TableOfContents::new();
540
541 for file in all_files {
543 let title = file.title.clone().unwrap_or_else(|| file.id.clone());
544 toc.add_entry(title, file.href.clone(), 0);
545 }
546
547 toc
548 }
549
550 fn group_files_into_chapters(all_files: &[EpubFile], spine: &Spine) -> Vec<Chapter> {
551 let mut chapters = Vec::new();
552 let mut current_chapter_files = Vec::new();
553 let mut current_chapter_title = String::new();
554
555 let file_map: HashMap<String, &EpubFile> = all_files
557 .iter()
558 .map(|file| (file.id.clone(), file))
559 .collect();
560
561 for (_index, itemref) in spine.itemref.iter().enumerate() {
562 if let Some(file) = file_map.get(&itemref.idref) {
563 let should_start_new_chapter = if current_chapter_files.is_empty() {
565 true
566 } else {
567 file.title.is_some()
571 && !Self::files_belong_to_same_chapter(¤t_chapter_files[0], file)
572 };
573
574 if should_start_new_chapter && !current_chapter_files.is_empty() {
575 let chapter = Chapter {
577 title: current_chapter_title.clone(),
578 files: current_chapter_files.clone(),
579 };
580 chapters.push(chapter);
581 current_chapter_files.clear();
582 }
583
584 if current_chapter_files.is_empty() {
585 current_chapter_title = file.title.clone().unwrap_or_else(|| file.id.clone());
587 }
588
589 current_chapter_files.push((*file).clone());
590 }
591 }
592
593 if !current_chapter_files.is_empty() {
595 let chapter = Chapter {
596 title: current_chapter_title,
597 files: current_chapter_files,
598 };
599 chapters.push(chapter);
600 }
601
602 chapters
603 }
604
605 fn files_belong_to_same_chapter(file1: &EpubFile, file2: &EpubFile) -> bool {
606 let base1 = Self::extract_chapter_base(&file1.id);
608 let base2 = Self::extract_chapter_base(&file2.id);
609 base1 == base2
610 }
611
612 fn extract_chapter_base(id: &str) -> String {
613 if let Some(last_underscore) = id.rfind('_') {
616 let after_underscore = &id[last_underscore + 1..];
617 if after_underscore.starts_with("part") {
620 return id[..last_underscore].to_string();
621 }
622 }
623 id.to_string()
624 }
625
626 fn get_zip_archive(file_path: &str) -> Result<ZipArchive<File>, Box<dyn error::Error>> {
627 let file = File::open(file_path)?;
628 let archive = ZipArchive::new(file)?;
629 Ok(archive)
630 }
631}
632
633fn parse_container_xml(xml: &str) -> Result<Container, Box<dyn std::error::Error>> {
635 let container: Container = serde_xml_rs::from_str(xml)?;
636 Ok(container)
637}
638
639fn parse_opf_xml(xml: &str) -> Result<Package, Box<dyn std::error::Error>> {
641 let package: Package = serde_xml_rs::from_str(xml)?;
642 Ok(package)
643}