Skip to main content

epub_parser/
lib.rs

1//! A Rust library for parsing EPUB e-book files.
2//!
3//! This library provides functionality to extract metadata, table of contents,
4//! text content, and images from EPUB files. It follows the EPUB specification
5//! and parses both OPF and NCX files to provide a complete representation
6//! of the e-book's structure.
7//!
8//! # Features
9//!
10//! - Parse EPUB container and locate OPF file
11//! - Extract Dublin Core metadata (title, author, publisher, language, etc.)
12//! - Parse NCX table of contents with hierarchical structure
13//! - Extract text from HTML/XHTML content files
14//! - Extract cover image and all images from EPUB
15//! - Follow reading order from OPF spine
16//! - Clean text extraction (strips HTML, handles line breaks)
17//!
18//! # Example
19//!
20//! ```
21//! use epub_parser::Epub;
22//! use std::path::Path;
23//!
24//! let epub = Epub::parse(Path::new("book.epub"))?;
25//!
26//! // Access metadata
27//! if let Some(title) = &epub.metadata.title {
28//!     println!("Title: {}", title);
29//! }
30//!
31//! // Access table of contents
32//! for entry in &epub.toc {
33//!     println!("- {} ({})", entry.label, entry.href);
34//! }
35//!
36//! // Access page content
37//! for page in &epub.pages {
38//!     println!("Page {}: {} characters", page.index, page.content.len());
39//! }
40//!
41//! // Access images
42//! for image in &epub.images {
43//!     println!("Image: {} ({})", image.href, image.media_type);
44//! }
45//! ```
46
47pub mod epub;
48pub mod types;
49pub mod utils;
50
51pub use epub::Epub;
52pub use types::{Image, Metadata, Page, TocEntry};
53pub use utils::{XmlParser, ZipHandler};