1extern crate alloc;
7
8use alloc::format;
9use alloc::string::{String, ToString};
10use alloc::vec::Vec;
11use std::collections::{BTreeMap, BTreeSet};
12use std::fs::File;
13use std::io::{Read, Seek};
14use std::path::Path;
15
16use quick_xml::events::Event;
17use quick_xml::reader::Reader;
18
19use crate::metadata::{parse_container_xml, parse_opf, EpubMetadata};
20use crate::navigation::{parse_nav_xhtml, parse_ncx};
21use crate::spine::Spine;
22use crate::zip::{StreamingZip, ZipLimits};
23
24#[derive(Clone, Copy, Debug, PartialEq, Eq)]
26pub enum ValidationSeverity {
27 Error,
29 Warning,
31}
32
33#[derive(Clone, Debug, PartialEq, Eq)]
35pub struct ValidationDiagnostic {
36 pub code: &'static str,
38 pub severity: ValidationSeverity,
40 pub message: String,
42 pub path: Option<String>,
44 pub location: Option<String>,
46 pub spec_ref: Option<&'static str>,
48 pub hint: Option<String>,
50}
51
52impl ValidationDiagnostic {
53 fn error(code: &'static str, message: impl Into<String>) -> Self {
54 Self {
55 code,
56 severity: ValidationSeverity::Error,
57 message: message.into(),
58 path: None,
59 location: None,
60 spec_ref: None,
61 hint: None,
62 }
63 }
64
65 fn warning(code: &'static str, message: impl Into<String>) -> Self {
66 Self {
67 code,
68 severity: ValidationSeverity::Warning,
69 message: message.into(),
70 path: None,
71 location: None,
72 spec_ref: None,
73 hint: None,
74 }
75 }
76}
77
78#[derive(Clone, Debug, Default, PartialEq, Eq)]
80pub struct ValidationReport {
81 diagnostics: Vec<ValidationDiagnostic>,
82}
83
84impl ValidationReport {
85 pub fn new() -> Self {
87 Self::default()
88 }
89
90 pub fn diagnostics(&self) -> &[ValidationDiagnostic] {
92 &self.diagnostics
93 }
94
95 pub fn error_count(&self) -> usize {
97 self.diagnostics
98 .iter()
99 .filter(|d| d.severity == ValidationSeverity::Error)
100 .count()
101 }
102
103 pub fn warning_count(&self) -> usize {
105 self.diagnostics
106 .iter()
107 .filter(|d| d.severity == ValidationSeverity::Warning)
108 .count()
109 }
110
111 pub fn is_valid(&self) -> bool {
113 self.error_count() == 0
114 }
115
116 fn push(&mut self, diagnostic: ValidationDiagnostic) {
117 self.diagnostics.push(diagnostic);
118 }
119}
120
121#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
123pub struct ValidationOptions {
124 pub zip_limits: Option<ZipLimits>,
126}
127
128pub fn validate_epub_file<P: AsRef<Path>>(path: P) -> Result<ValidationReport, crate::EpubError> {
130 validate_epub_file_with_options(path, ValidationOptions::default())
131}
132
133pub fn validate_epub_file_with_options<P: AsRef<Path>>(
135 path: P,
136 options: ValidationOptions,
137) -> Result<ValidationReport, crate::EpubError> {
138 let file = File::open(path).map_err(|e| crate::EpubError::Io(e.to_string()))?;
139 Ok(validate_epub_reader_with_options(file, options))
140}
141
142pub fn validate_epub_reader<R: Read + Seek>(reader: R) -> ValidationReport {
144 validate_epub_reader_with_options(reader, ValidationOptions::default())
145}
146
147pub fn validate_epub_reader_with_options<R: Read + Seek>(
149 reader: R,
150 options: ValidationOptions,
151) -> ValidationReport {
152 let mut report = ValidationReport::new();
153 let mut zip = match StreamingZip::new_with_limits(reader, options.zip_limits) {
154 Ok(zip) => zip,
155 Err(err) => {
156 let mut d = ValidationDiagnostic::error(
157 "ZIP_INVALID_ARCHIVE",
158 format!("Failed to parse ZIP container: {}", err),
159 );
160 d.spec_ref = Some("OCF ZIP container");
161 report.push(d);
162 return report;
163 }
164 };
165
166 if let Err(err) = zip.validate_mimetype() {
167 let mut d = ValidationDiagnostic::error(
168 "OCF_INVALID_MIMETYPE",
169 format!("Invalid or missing mimetype entry: {}", err),
170 );
171 d.path = Some("mimetype".to_string());
172 d.spec_ref = Some("OCF mimetype");
173 d.hint = Some("Ensure `mimetype` exists and equals `application/epub+zip`.".to_string());
174 report.push(d);
175 }
176
177 let container_entry = match zip.get_entry("META-INF/container.xml").cloned() {
178 Some(entry) => entry,
179 None => {
180 let mut d = ValidationDiagnostic::error(
181 "OCF_CONTAINER_XML_MISSING",
182 "Missing required `META-INF/container.xml`.",
183 );
184 d.path = Some("META-INF/container.xml".to_string());
185 d.spec_ref = Some("OCF container.xml");
186 report.push(d);
187 return report;
188 }
189 };
190
191 let container_xml = match read_entry(&mut zip, container_entry.local_header_offset) {
192 Ok(bytes) => bytes,
193 Err(err) => {
194 let mut d = ValidationDiagnostic::error(
195 "OCF_CONTAINER_XML_UNREADABLE",
196 format!("Failed to read `container.xml`: {}", err),
197 );
198 d.path = Some("META-INF/container.xml".to_string());
199 report.push(d);
200 return report;
201 }
202 };
203
204 let opf_path = match parse_container_xml(&container_xml) {
205 Ok(path) => path,
206 Err(err) => {
207 let mut d = ValidationDiagnostic::error(
208 "OPF_ROOTFILE_MISSING",
209 format!("container.xml does not declare a usable rootfile: {}", err),
210 );
211 d.path = Some("META-INF/container.xml".to_string());
212 d.spec_ref = Some("EPUB package document discovery");
213 report.push(d);
214 return report;
215 }
216 };
217
218 let opf_entry = match zip.get_entry(&opf_path).cloned() {
219 Some(entry) => entry,
220 None => {
221 let mut d = ValidationDiagnostic::error(
222 "OPF_FILE_MISSING",
223 format!(
224 "Rootfile path '{}' from container.xml is missing.",
225 opf_path
226 ),
227 );
228 d.path = Some(opf_path.clone());
229 d.spec_ref = Some("Package document");
230 report.push(d);
231 return report;
232 }
233 };
234
235 let opf_bytes = match read_entry(&mut zip, opf_entry.local_header_offset) {
236 Ok(bytes) => bytes,
237 Err(err) => {
238 let mut d = ValidationDiagnostic::error(
239 "OPF_FILE_UNREADABLE",
240 format!("Failed to read package document '{}': {}", opf_path, err),
241 );
242 d.path = Some(opf_path.clone());
243 report.push(d);
244 return report;
245 }
246 };
247
248 let metadata = match parse_opf(&opf_bytes) {
249 Ok(metadata) => metadata,
250 Err(err) => {
251 let mut d = ValidationDiagnostic::error(
252 "OPF_PARSE_ERROR",
253 format!("Failed to parse package document '{}': {}", opf_path, err),
254 );
255 d.path = Some(opf_path.clone());
256 d.spec_ref = Some("OPF package document");
257 report.push(d);
258 return report;
259 }
260 };
261
262 let spine = match crate::spine::parse_spine(&opf_bytes) {
263 Ok(spine) => spine,
264 Err(err) => {
265 let mut d = ValidationDiagnostic::error(
266 "SPINE_PARSE_ERROR",
267 format!("Failed to parse `<spine>` in '{}': {}", opf_path, err),
268 );
269 d.path = Some(opf_path.clone());
270 d.location = Some("spine".to_string());
271 report.push(d);
272 return report;
273 }
274 };
275
276 validate_manifest_integrity(&metadata, &mut report);
277 validate_manifest_fallbacks(&opf_bytes, &mut report);
278 validate_manifest_resources_exist(&zip, &metadata, &opf_path, &mut report);
279 validate_spine_integrity(&metadata, &spine, &mut report);
280 validate_navigation_integrity(&mut zip, &metadata, &spine, &opf_path, &mut report);
281 validate_container_sidecars(&mut zip, &mut report);
282
283 report
284}
285
286#[derive(Clone, Debug)]
287struct OpfManifestAttrs {
288 id: String,
289 href: String,
290 media_type: String,
291 fallback: Option<String>,
292}
293
294fn parse_opf_manifest_attrs(opf_bytes: &[u8]) -> Result<Vec<OpfManifestAttrs>, String> {
295 let mut reader = Reader::from_reader(opf_bytes);
296 reader.config_mut().trim_text(true);
297 let mut buf = Vec::with_capacity(0);
298 let mut in_manifest = false;
299 let mut out = Vec::with_capacity(0);
300
301 loop {
302 match reader.read_event_into(&mut buf) {
303 Ok(Event::Start(e)) => {
304 let name = reader
305 .decoder()
306 .decode(e.name().as_ref())
307 .map_err(|e| format!("decode error: {:?}", e))?
308 .to_string();
309 if name == "manifest" {
310 in_manifest = true;
311 } else if in_manifest && name == "item" {
312 if let Some(attrs) = parse_manifest_item_attrs(&reader, &e)? {
313 out.push(attrs);
314 }
315 }
316 }
317 Ok(Event::Empty(e)) => {
318 let name = reader
319 .decoder()
320 .decode(e.name().as_ref())
321 .map_err(|e| format!("decode error: {:?}", e))?
322 .to_string();
323 if in_manifest && name == "item" {
324 if let Some(attrs) = parse_manifest_item_attrs(&reader, &e)? {
325 out.push(attrs);
326 }
327 }
328 }
329 Ok(Event::End(e)) => {
330 let name = reader
331 .decoder()
332 .decode(e.name().as_ref())
333 .map_err(|e| format!("decode error: {:?}", e))?
334 .to_string();
335 if name == "manifest" {
336 in_manifest = false;
337 }
338 }
339 Ok(Event::Eof) => break,
340 Err(e) => return Err(format!("XML parse error: {:?}", e)),
341 _ => {}
342 }
343 buf.clear();
344 }
345
346 Ok(out)
347}
348
349fn parse_manifest_item_attrs(
350 reader: &Reader<&[u8]>,
351 e: &quick_xml::events::BytesStart<'_>,
352) -> Result<Option<OpfManifestAttrs>, String> {
353 let mut id = None;
354 let mut href = None;
355 let mut media_type = None;
356 let mut fallback = None;
357
358 for attr in e.attributes() {
359 let attr = attr.map_err(|err| format!("attr error: {:?}", err))?;
360 let key = reader
361 .decoder()
362 .decode(attr.key.as_ref())
363 .map_err(|err| format!("decode error: {:?}", err))?;
364 let value = reader
365 .decoder()
366 .decode(&attr.value)
367 .map_err(|err| format!("decode error: {:?}", err))?
368 .to_string();
369 match key.as_ref() {
370 "id" => id = Some(value),
371 "href" => href = Some(value),
372 "media-type" => media_type = Some(value),
373 "fallback" => fallback = Some(value),
374 _ => {}
375 }
376 }
377
378 match (id, href, media_type) {
379 (Some(id), Some(href), Some(media_type)) => Ok(Some(OpfManifestAttrs {
380 id,
381 href,
382 media_type,
383 fallback,
384 })),
385 _ => Ok(None),
386 }
387}
388
389fn is_epub_core_media_type(media_type: &str) -> bool {
390 matches!(
391 media_type,
392 "application/xhtml+xml"
393 | "application/x-dtbncx+xml"
394 | "text/css"
395 | "image/gif"
396 | "image/jpeg"
397 | "image/png"
398 | "image/svg+xml"
399 | "font/otf"
400 | "font/ttf"
401 | "font/woff"
402 | "font/woff2"
403 | "application/vnd.ms-opentype"
404 | "audio/mpeg"
405 | "audio/mp4"
406 | "video/mp4"
407 )
408}
409
410fn validate_manifest_fallbacks(opf_bytes: &[u8], report: &mut ValidationReport) {
411 let items = match parse_opf_manifest_attrs(opf_bytes) {
412 Ok(items) => items,
413 Err(err) => {
414 let mut d = ValidationDiagnostic::warning(
415 "OPF_MANIFEST_PARSE_PARTIAL",
416 format!("Could not analyze manifest fallback attributes: {}", err),
417 );
418 d.location = Some("manifest".to_string());
419 report.push(d);
420 return;
421 }
422 };
423
424 let by_id: BTreeMap<&str, &OpfManifestAttrs> =
425 items.iter().map(|item| (item.id.as_str(), item)).collect();
426
427 for item in &items {
428 if !is_epub_core_media_type(&item.media_type) && item.fallback.is_none() {
429 let mut d = ValidationDiagnostic::warning(
430 "MANIFEST_FOREIGN_NO_FALLBACK",
431 format!(
432 "Manifest item '{}' has non-core media-type '{}' without fallback.",
433 item.id, item.media_type
434 ),
435 );
436 d.location = Some("manifest".to_string());
437 d.path = Some(item.href.clone());
438 d.hint = Some(
439 "Add `fallback=\"...\"` to a supported content-document representation."
440 .to_string(),
441 );
442 report.push(d);
443 }
444
445 if let Some(fallback_id) = item.fallback.as_deref() {
446 if fallback_id == item.id {
447 let mut d = ValidationDiagnostic::error(
448 "MANIFEST_FALLBACK_SELF_REFERENCE",
449 format!("Manifest item '{}' fallback points to itself.", item.id),
450 );
451 d.location = Some("manifest".to_string());
452 d.path = Some(item.href.clone());
453 report.push(d);
454 continue;
455 }
456
457 if !by_id.contains_key(fallback_id) {
458 let mut d = ValidationDiagnostic::error(
459 "MANIFEST_FALLBACK_TARGET_MISSING",
460 format!(
461 "Manifest item '{}' fallback references missing id '{}'.",
462 item.id, fallback_id
463 ),
464 );
465 d.location = Some("manifest".to_string());
466 d.path = Some(item.href.clone());
467 report.push(d);
468 continue;
469 }
470
471 let mut seen = BTreeSet::new();
472 let mut cursor = fallback_id;
473 while let Some(next) = by_id
474 .get(cursor)
475 .and_then(|entry| entry.fallback.as_deref())
476 {
477 if !seen.insert(cursor) {
478 let mut d = ValidationDiagnostic::error(
479 "MANIFEST_FALLBACK_CYCLE",
480 format!(
481 "Fallback chain from '{}' contains a cycle at id '{}'.",
482 item.id, cursor
483 ),
484 );
485 d.location = Some("manifest".to_string());
486 d.path = Some(item.href.clone());
487 report.push(d);
488 break;
489 }
490 cursor = next;
491 }
492 }
493 }
494}
495
496fn validate_container_sidecars<F: Read + Seek>(
497 zip: &mut StreamingZip<F>,
498 report: &mut ValidationReport,
499) {
500 validate_optional_xml_sidecar(
501 zip,
502 report,
503 "META-INF/encryption.xml",
504 "ENCRYPTION_XML_UNREADABLE",
505 "ENCRYPTION_XML_PARSE_ERROR",
506 );
507 validate_optional_xml_sidecar(
508 zip,
509 report,
510 "META-INF/rights.xml",
511 "RIGHTS_XML_UNREADABLE",
512 "RIGHTS_XML_PARSE_ERROR",
513 );
514 validate_encryption_references(zip, report);
515}
516
517fn validate_optional_xml_sidecar<F: Read + Seek>(
518 zip: &mut StreamingZip<F>,
519 report: &mut ValidationReport,
520 path: &str,
521 unreadable_code: &'static str,
522 parse_code: &'static str,
523) {
524 let Some(entry) = zip.get_entry(path).cloned() else {
525 return;
526 };
527 let bytes = match read_entry(zip, entry.local_header_offset) {
528 Ok(bytes) => bytes,
529 Err(err) => {
530 let mut d = ValidationDiagnostic::error(
531 unreadable_code,
532 format!("Failed to read '{}': {}", path, err),
533 );
534 d.path = Some(path.to_string());
535 d.location = Some("ocf".to_string());
536 report.push(d);
537 return;
538 }
539 };
540 let mut reader = Reader::from_reader(bytes.as_slice());
541 reader.config_mut().trim_text(true);
542 let mut buf = Vec::with_capacity(0);
543 loop {
544 match reader.read_event_into(&mut buf) {
545 Ok(Event::Eof) => break,
546 Ok(_) => {}
547 Err(err) => {
548 let mut d = ValidationDiagnostic::error(
549 parse_code,
550 format!("Failed to parse '{}': {:?}", path, err),
551 );
552 d.path = Some(path.to_string());
553 d.location = Some("ocf".to_string());
554 report.push(d);
555 return;
556 }
557 }
558 buf.clear();
559 }
560}
561
562fn validate_encryption_references<F: Read + Seek>(
563 zip: &mut StreamingZip<F>,
564 report: &mut ValidationReport,
565) {
566 let Some(entry) = zip.get_entry("META-INF/encryption.xml").cloned() else {
567 return;
568 };
569 let bytes = match read_entry(zip, entry.local_header_offset) {
570 Ok(bytes) => bytes,
571 Err(_) => return,
572 };
573
574 let mut reader = Reader::from_reader(bytes.as_slice());
575 reader.config_mut().trim_text(true);
576 let mut buf = Vec::with_capacity(0);
577 loop {
578 match reader.read_event_into(&mut buf) {
579 Ok(Event::Start(e)) | Ok(Event::Empty(e)) => {
580 let tag = match reader.decoder().decode(e.name().as_ref()) {
581 Ok(v) => v.to_string(),
582 Err(_) => {
583 buf.clear();
584 continue;
585 }
586 };
587 if !tag.ends_with("CipherReference") {
588 buf.clear();
589 continue;
590 }
591 for attr in e.attributes().flatten() {
592 let key = match reader.decoder().decode(attr.key.as_ref()) {
593 Ok(v) => v,
594 Err(_) => continue,
595 };
596 if key != "URI" {
597 continue;
598 }
599 let uri = match reader.decoder().decode(&attr.value) {
600 Ok(v) => v.to_string(),
601 Err(_) => continue,
602 };
603 if uri.contains("://") || uri.starts_with('/') || uri.trim().is_empty() {
604 continue;
605 }
606 let full_path = resolve_opf_relative("META-INF/encryption.xml", &uri);
607 if zip.get_entry(&full_path).is_none() {
608 let mut d = ValidationDiagnostic::error(
609 "ENCRYPTION_REFERENCE_MISSING",
610 format!(
611 "`encryption.xml` references missing encrypted resource '{}'.",
612 full_path
613 ),
614 );
615 d.location = Some("ocf".to_string());
616 d.path = Some("META-INF/encryption.xml".to_string());
617 report.push(d);
618 }
619 }
620 }
621 Ok(Event::Eof) => break,
622 Ok(_) => {}
623 Err(_) => break,
624 }
625 buf.clear();
626 }
627}
628
629fn read_entry<F: Read + Seek>(
630 zip: &mut StreamingZip<F>,
631 local_header_offset: u64,
632) -> Result<Vec<u8>, crate::ZipError> {
633 let entry = zip
634 .entries()
635 .find(|e| e.local_header_offset == local_header_offset)
636 .ok_or(crate::ZipError::FileNotFound)?
637 .clone();
638 let size =
639 usize::try_from(entry.uncompressed_size).map_err(|_| crate::ZipError::FileTooLarge)?;
640 let mut buf = vec![0u8; size];
641 let n = zip.read_file_at_offset(local_header_offset, &mut buf)?;
642 buf.truncate(n);
643 Ok(buf)
644}
645
646fn validate_manifest_integrity(metadata: &EpubMetadata, report: &mut ValidationReport) {
647 let mut ids = BTreeSet::new();
648 let mut hrefs = BTreeSet::new();
649 for item in &metadata.manifest {
650 if item.id.trim().is_empty() {
651 let mut d = ValidationDiagnostic::error(
652 "MANIFEST_ID_EMPTY",
653 "Manifest item has empty `id` attribute.",
654 );
655 d.location = Some("manifest".to_string());
656 d.path = Some(item.href.clone());
657 report.push(d);
658 }
659 if item.href.trim().is_empty() {
660 let mut d = ValidationDiagnostic::error(
661 "MANIFEST_HREF_EMPTY",
662 format!("Manifest item '{}' has empty `href`.", item.id),
663 );
664 d.location = Some("manifest".to_string());
665 report.push(d);
666 }
667 if item.media_type.trim().is_empty() {
668 let mut d = ValidationDiagnostic::error(
669 "MANIFEST_MEDIA_TYPE_EMPTY",
670 format!("Manifest item '{}' has empty `media-type`.", item.id),
671 );
672 d.location = Some("manifest".to_string());
673 d.path = Some(item.href.clone());
674 report.push(d);
675 }
676
677 if !ids.insert(item.id.clone()) {
678 let mut d = ValidationDiagnostic::error(
679 "MANIFEST_ID_DUPLICATE",
680 format!("Duplicate manifest id '{}'.", item.id),
681 );
682 d.location = Some("manifest".to_string());
683 report.push(d);
684 }
685
686 let href_key = item.href.to_ascii_lowercase();
687 if !href_key.is_empty() && !hrefs.insert(href_key) {
688 let mut d = ValidationDiagnostic::warning(
689 "MANIFEST_HREF_DUPLICATE",
690 format!("Multiple manifest items reference href '{}'.", item.href),
691 );
692 d.location = Some("manifest".to_string());
693 d.path = Some(item.href.clone());
694 report.push(d);
695 }
696 }
697}
698
699fn validate_manifest_resources_exist<F: Read + Seek>(
700 zip: &StreamingZip<F>,
701 metadata: &EpubMetadata,
702 opf_path: &str,
703 report: &mut ValidationReport,
704) {
705 for item in &metadata.manifest {
706 if item.href.contains("://") || item.href.trim().is_empty() {
707 continue;
708 }
709 let full_path = resolve_opf_relative(opf_path, &item.href);
710 if zip.get_entry(&full_path).is_none() {
711 let mut d = ValidationDiagnostic::error(
712 "MANIFEST_RESOURCE_MISSING",
713 format!(
714 "Manifest item '{}' points to missing resource '{}'.",
715 item.id, full_path
716 ),
717 );
718 d.location = Some("manifest".to_string());
719 d.path = Some(full_path);
720 report.push(d);
721 }
722 }
723}
724
725fn validate_spine_integrity(metadata: &EpubMetadata, spine: &Spine, report: &mut ValidationReport) {
726 if spine.is_empty() {
727 let mut d =
728 ValidationDiagnostic::warning("SPINE_EMPTY", "Spine has no reading-order entries.");
729 d.location = Some("spine".to_string());
730 report.push(d);
731 }
732
733 for (index, item) in spine.items().iter().enumerate() {
734 if let Some(manifest_item) = metadata.get_item(&item.idref) {
735 if manifest_item.media_type != "application/xhtml+xml" {
736 let mut d = ValidationDiagnostic::warning(
737 "SPINE_ITEM_NON_XHTML",
738 format!(
739 "Spine item '{}' references media-type '{}' (expected application/xhtml+xml).",
740 item.idref, manifest_item.media_type
741 ),
742 );
743 d.location = Some("spine".to_string());
744 d.path = Some(manifest_item.href.clone());
745 report.push(d);
746 }
747 } else {
748 let mut d = ValidationDiagnostic::error(
749 "SPINE_IDREF_NOT_IN_MANIFEST",
750 format!(
751 "Spine item at index {} references unknown manifest id '{}'.",
752 index, item.idref
753 ),
754 );
755 d.location = Some("spine".to_string());
756 d.spec_ref = Some("OPF spine/itemref");
757 d.hint = Some(
758 "Ensure each `<itemref idref=\"...\">` matches a manifest `<item id=\"...\">`."
759 .to_string(),
760 );
761 report.push(d);
762 }
763 }
764}
765
766fn validate_navigation_integrity<F: Read + Seek>(
767 zip: &mut StreamingZip<F>,
768 metadata: &EpubMetadata,
769 spine: &Spine,
770 opf_path: &str,
771 report: &mut ValidationReport,
772) {
773 let nav_item = metadata
774 .manifest
775 .iter()
776 .find(|item| item.properties.as_deref().unwrap_or("").contains("nav"));
777
778 if let Some(nav_item) = nav_item {
779 if nav_item.media_type != "application/xhtml+xml"
780 && nav_item.media_type != "application/x-dtbncx+xml"
781 {
782 let mut d = ValidationDiagnostic::error(
783 "NAV_DOCUMENT_MEDIA_TYPE_INVALID",
784 format!(
785 "Navigation item '{}' has unexpected media-type '{}'.",
786 nav_item.id, nav_item.media_type
787 ),
788 );
789 d.path = Some(nav_item.href.clone());
790 d.location = Some("navigation".to_string());
791 report.push(d);
792 }
793 let full_path = resolve_opf_relative(opf_path, &nav_item.href);
794 let nav_entry = match zip.get_entry(&full_path).cloned() {
795 Some(entry) => entry,
796 None => {
797 let mut d = ValidationDiagnostic::error(
798 "NAV_DOCUMENT_MISSING",
799 format!("Manifest nav item points to missing file '{}'.", full_path),
800 );
801 d.path = Some(full_path);
802 d.location = Some("navigation".to_string());
803 report.push(d);
804 return;
805 }
806 };
807
808 match read_entry(zip, nav_entry.local_header_offset) {
809 Ok(bytes) => {
810 if let Err(err) = parse_nav_xhtml(&bytes) {
811 let mut d = ValidationDiagnostic::error(
812 "NAV_DOCUMENT_PARSE_ERROR",
813 format!("Failed to parse nav document: {}", err),
814 );
815 d.path = Some(full_path);
816 d.location = Some("navigation".to_string());
817 report.push(d);
818 }
819 }
820 Err(err) => {
821 let mut d = ValidationDiagnostic::error(
822 "NAV_DOCUMENT_UNREADABLE",
823 format!("Failed to read nav document: {}", err),
824 );
825 d.path = Some(full_path);
826 d.location = Some("navigation".to_string());
827 report.push(d);
828 }
829 }
830 return;
831 }
832
833 if let Some(toc_id) = spine.toc_id() {
834 let ncx_item = metadata.get_item(toc_id);
835 match ncx_item {
836 Some(item) => {
837 let full_path = resolve_opf_relative(opf_path, &item.href);
838 match zip.get_entry(&full_path).cloned() {
839 Some(entry) => match read_entry(zip, entry.local_header_offset) {
840 Ok(bytes) => {
841 if let Err(err) = parse_ncx(&bytes) {
842 let mut d = ValidationDiagnostic::error(
843 "NCX_PARSE_ERROR",
844 format!("Failed to parse NCX document: {}", err),
845 );
846 d.path = Some(full_path);
847 d.location = Some("navigation".to_string());
848 report.push(d);
849 }
850 }
851 Err(err) => {
852 let mut d = ValidationDiagnostic::error(
853 "NCX_UNREADABLE",
854 format!("Failed to read NCX document: {}", err),
855 );
856 d.path = Some(full_path);
857 d.location = Some("navigation".to_string());
858 report.push(d);
859 }
860 },
861 None => {
862 let mut d = ValidationDiagnostic::error(
863 "NCX_MISSING",
864 format!(
865 "Spine `toc` references '{}' but resolved path '{}' is missing.",
866 toc_id, full_path
867 ),
868 );
869 d.path = Some(full_path);
870 d.location = Some("navigation".to_string());
871 report.push(d);
872 }
873 }
874 }
875 None => {
876 let mut d = ValidationDiagnostic::error(
877 "NCX_IDREF_NOT_IN_MANIFEST",
878 format!("Spine `toc` references unknown manifest id '{}'.", toc_id),
879 );
880 d.location = Some("spine".to_string());
881 report.push(d);
882 }
883 }
884 return;
885 }
886
887 let mut d = ValidationDiagnostic::warning(
888 "NAV_MISSING",
889 "No EPUB3 nav document and no EPUB2 NCX reference found.",
890 );
891 d.location = Some("navigation".to_string());
892 d.hint = Some(
893 "Add a manifest nav item (`properties=\"nav\"`) or spine `toc` NCX fallback.".to_string(),
894 );
895 report.push(d);
896}
897
898fn resolve_opf_relative(opf_path: &str, href: &str) -> String {
899 if href.contains("://") || href.starts_with('/') {
900 return href.to_string();
901 }
902 match opf_path.rfind('/') {
903 Some(idx) => format!("{}/{}", &opf_path[..idx], href),
904 None => href.to_string(),
905 }
906}
907
908#[cfg(test)]
909mod tests {
910 use super::*;
911
912 const SIG_LOCAL_FILE_HEADER: u32 = 0x04034b50;
913 const SIG_CD_ENTRY: u32 = 0x02014b50;
914 const SIG_EOCD: u32 = 0x06054b50;
915
916 fn build_zip(files: &[(&str, &[u8])]) -> Vec<u8> {
917 struct FileMeta {
918 name: String,
919 local_offset: u32,
920 crc32: u32,
921 size: u32,
922 }
923
924 let mut zip = Vec::with_capacity(0);
925 let mut metas = Vec::with_capacity(0);
926
927 for (name, content) in files {
928 let name_bytes = name.as_bytes();
929 let name_len = name_bytes.len() as u16;
930 let content_len = content.len() as u32;
931 let crc = crc32fast::hash(content);
932 let local_offset = zip.len() as u32;
933
934 zip.extend_from_slice(&SIG_LOCAL_FILE_HEADER.to_le_bytes());
935 zip.extend_from_slice(&20u16.to_le_bytes());
936 zip.extend_from_slice(&0u16.to_le_bytes());
937 zip.extend_from_slice(&0u16.to_le_bytes()); zip.extend_from_slice(&0u16.to_le_bytes());
939 zip.extend_from_slice(&0u16.to_le_bytes());
940 zip.extend_from_slice(&crc.to_le_bytes());
941 zip.extend_from_slice(&content_len.to_le_bytes());
942 zip.extend_from_slice(&content_len.to_le_bytes());
943 zip.extend_from_slice(&name_len.to_le_bytes());
944 zip.extend_from_slice(&0u16.to_le_bytes());
945 zip.extend_from_slice(name_bytes);
946 zip.extend_from_slice(content);
947
948 metas.push(FileMeta {
949 name: (*name).to_string(),
950 local_offset,
951 crc32: crc,
952 size: content_len,
953 });
954 }
955
956 let cd_offset = zip.len() as u32;
957 for meta in &metas {
958 let name_bytes = meta.name.as_bytes();
959 let name_len = name_bytes.len() as u16;
960 zip.extend_from_slice(&SIG_CD_ENTRY.to_le_bytes());
961 zip.extend_from_slice(&20u16.to_le_bytes());
962 zip.extend_from_slice(&20u16.to_le_bytes());
963 zip.extend_from_slice(&0u16.to_le_bytes());
964 zip.extend_from_slice(&0u16.to_le_bytes()); zip.extend_from_slice(&0u16.to_le_bytes());
966 zip.extend_from_slice(&0u16.to_le_bytes());
967 zip.extend_from_slice(&meta.crc32.to_le_bytes());
968 zip.extend_from_slice(&meta.size.to_le_bytes());
969 zip.extend_from_slice(&meta.size.to_le_bytes());
970 zip.extend_from_slice(&name_len.to_le_bytes());
971 zip.extend_from_slice(&0u16.to_le_bytes());
972 zip.extend_from_slice(&0u16.to_le_bytes());
973 zip.extend_from_slice(&0u16.to_le_bytes());
974 zip.extend_from_slice(&0u16.to_le_bytes());
975 zip.extend_from_slice(&0u32.to_le_bytes());
976 zip.extend_from_slice(&meta.local_offset.to_le_bytes());
977 zip.extend_from_slice(name_bytes);
978 }
979
980 let cd_size = (zip.len() as u32) - cd_offset;
981 let entries = metas.len() as u16;
982
983 zip.extend_from_slice(&SIG_EOCD.to_le_bytes());
984 zip.extend_from_slice(&0u16.to_le_bytes());
985 zip.extend_from_slice(&0u16.to_le_bytes());
986 zip.extend_from_slice(&entries.to_le_bytes());
987 zip.extend_from_slice(&entries.to_le_bytes());
988 zip.extend_from_slice(&cd_size.to_le_bytes());
989 zip.extend_from_slice(&cd_offset.to_le_bytes());
990 zip.extend_from_slice(&0u16.to_le_bytes());
991
992 zip
993 }
994
995 fn minimal_valid_epub_zip() -> Vec<u8> {
996 let container_xml = br#"<?xml version="1.0"?>
997<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
998 <rootfiles>
999 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1000 </rootfiles>
1001</container>"#;
1002
1003 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1004<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1005 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1006 <dc:title>Test Book</dc:title>
1007 <dc:creator>Tester</dc:creator>
1008 <dc:language>en</dc:language>
1009 </metadata>
1010 <manifest>
1011 <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
1012 <item id="c1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
1013 </manifest>
1014 <spine>
1015 <itemref idref="c1"/>
1016 </spine>
1017</package>"#;
1018
1019 let nav = br#"<?xml version="1.0" encoding="utf-8"?>
1020<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
1021 <body>
1022 <nav epub:type="toc">
1023 <ol><li><a href="ch1.xhtml">Chapter 1</a></li></ol>
1024 </nav>
1025 </body>
1026</html>"#;
1027
1028 let ch1 = br#"<html xmlns="http://www.w3.org/1999/xhtml"><body><p>Hello</p></body></html>"#;
1029
1030 build_zip(&[
1031 ("mimetype", b"application/epub+zip"),
1032 ("META-INF/container.xml", container_xml),
1033 ("EPUB/package.opf", opf),
1034 ("EPUB/nav.xhtml", nav),
1035 ("EPUB/ch1.xhtml", ch1),
1036 ])
1037 }
1038
1039 #[test]
1040 fn validate_minimal_valid_epub() {
1041 let data = minimal_valid_epub_zip();
1042 let report = validate_epub_reader(std::io::Cursor::new(data));
1043 assert!(report.is_valid(), "expected valid report: {:?}", report);
1044 assert_eq!(report.error_count(), 0);
1045 }
1046
1047 #[test]
1048 fn validate_detects_missing_container() {
1049 let data = build_zip(&[("mimetype", b"application/epub+zip")]);
1050 let report = validate_epub_reader(std::io::Cursor::new(data));
1051 assert!(!report.is_valid());
1052 assert!(report
1053 .diagnostics()
1054 .iter()
1055 .any(|d| d.code == "OCF_CONTAINER_XML_MISSING"));
1056 }
1057
1058 #[test]
1059 fn validate_detects_spine_manifest_mismatch() {
1060 let container_xml = br#"<?xml version="1.0"?>
1061<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1062 <rootfiles>
1063 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1064 </rootfiles>
1065</container>"#;
1066
1067 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1068<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1069 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1070 <dc:title>Test</dc:title><dc:creator>A</dc:creator><dc:language>en</dc:language>
1071 </metadata>
1072 <manifest>
1073 <item id="only" href="only.xhtml" media-type="application/xhtml+xml"/>
1074 </manifest>
1075 <spine>
1076 <itemref idref="missing"/>
1077 </spine>
1078</package>"#;
1079
1080 let data = build_zip(&[
1081 ("mimetype", b"application/epub+zip"),
1082 ("META-INF/container.xml", container_xml),
1083 ("EPUB/package.opf", opf),
1084 ("EPUB/only.xhtml", b"<html/>"),
1085 ]);
1086 let report = validate_epub_reader(std::io::Cursor::new(data));
1087 assert!(!report.is_valid());
1088 assert!(report
1089 .diagnostics()
1090 .iter()
1091 .any(|d| d.code == "SPINE_IDREF_NOT_IN_MANIFEST"));
1092 }
1093
1094 #[test]
1095 fn validate_detects_missing_manifest_resource() {
1096 let container_xml = br#"<?xml version="1.0"?>
1097<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1098 <rootfiles>
1099 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1100 </rootfiles>
1101</container>"#;
1102
1103 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1104<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1105 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1106 <dc:title>Test</dc:title><dc:creator>A</dc:creator><dc:language>en</dc:language>
1107 </metadata>
1108 <manifest>
1109 <item id="c1" href="missing.xhtml" media-type="application/xhtml+xml"/>
1110 </manifest>
1111 <spine>
1112 <itemref idref="c1"/>
1113 </spine>
1114</package>"#;
1115
1116 let data = build_zip(&[
1117 ("mimetype", b"application/epub+zip"),
1118 ("META-INF/container.xml", container_xml),
1119 ("EPUB/package.opf", opf),
1120 ]);
1121 let report = validate_epub_reader(std::io::Cursor::new(data));
1122 assert!(report
1123 .diagnostics()
1124 .iter()
1125 .any(|d| d.code == "MANIFEST_RESOURCE_MISSING"));
1126 }
1127
1128 #[test]
1129 fn validate_warns_on_non_xhtml_spine_item() {
1130 let container_xml = br#"<?xml version="1.0"?>
1131<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1132 <rootfiles>
1133 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1134 </rootfiles>
1135</container>"#;
1136
1137 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1138<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1139 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1140 <dc:title>Test</dc:title><dc:creator>A</dc:creator><dc:language>en</dc:language>
1141 </metadata>
1142 <manifest>
1143 <item id="c1" href="ch1.txt" media-type="text/plain"/>
1144 </manifest>
1145 <spine>
1146 <itemref idref="c1"/>
1147 </spine>
1148</package>"#;
1149
1150 let data = build_zip(&[
1151 ("mimetype", b"application/epub+zip"),
1152 ("META-INF/container.xml", container_xml),
1153 ("EPUB/package.opf", opf),
1154 ("EPUB/ch1.txt", b"hello"),
1155 ]);
1156 let report = validate_epub_reader(std::io::Cursor::new(data));
1157 assert!(report.warning_count() > 0);
1158 assert!(report
1159 .diagnostics()
1160 .iter()
1161 .any(|d| d.code == "SPINE_ITEM_NON_XHTML"));
1162 }
1163
1164 #[test]
1165 fn validate_detects_missing_manifest_fallback_target() {
1166 let container_xml = br#"<?xml version="1.0"?>
1167<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1168 <rootfiles>
1169 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1170 </rootfiles>
1171</container>"#;
1172
1173 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1174<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1175 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1176 <dc:title>Test</dc:title><dc:creator>A</dc:creator><dc:language>en</dc:language>
1177 </metadata>
1178 <manifest>
1179 <item id="scripted" href="script.js" media-type="text/javascript" fallback="missing"/>
1180 <item id="c1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
1181 <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
1182 </manifest>
1183 <spine>
1184 <itemref idref="c1"/>
1185 </spine>
1186</package>"#;
1187
1188 let nav = br#"<?xml version="1.0" encoding="utf-8"?>
1189<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
1190 <body><nav epub:type="toc"><ol><li><a href="ch1.xhtml">Chapter 1</a></li></ol></nav></body>
1191</html>"#;
1192
1193 let data = build_zip(&[
1194 ("mimetype", b"application/epub+zip"),
1195 ("META-INF/container.xml", container_xml),
1196 ("EPUB/package.opf", opf),
1197 ("EPUB/ch1.xhtml", b"<html/>"),
1198 ("EPUB/nav.xhtml", nav),
1199 ("EPUB/script.js", b"alert('x');"),
1200 ]);
1201 let report = validate_epub_reader(std::io::Cursor::new(data));
1202 assert!(report
1203 .diagnostics()
1204 .iter()
1205 .any(|d| d.code == "MANIFEST_FALLBACK_TARGET_MISSING"));
1206 }
1207
1208 #[test]
1209 fn validate_warns_on_foreign_resource_without_fallback() {
1210 let container_xml = br#"<?xml version="1.0"?>
1211<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1212 <rootfiles>
1213 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1214 </rootfiles>
1215</container>"#;
1216
1217 let opf = br#"<?xml version="1.0" encoding="UTF-8"?>
1218<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1219 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1220 <dc:title>Test</dc:title><dc:creator>A</dc:creator><dc:language>en</dc:language>
1221 </metadata>
1222 <manifest>
1223 <item id="foreign" href="script.js" media-type="text/javascript"/>
1224 <item id="c1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
1225 <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
1226 </manifest>
1227 <spine>
1228 <itemref idref="c1"/>
1229 </spine>
1230</package>"#;
1231
1232 let nav = br#"<?xml version="1.0" encoding="utf-8"?>
1233<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
1234 <body><nav epub:type="toc"><ol><li><a href="ch1.xhtml">Chapter 1</a></li></ol></nav></body>
1235</html>"#;
1236
1237 let data = build_zip(&[
1238 ("mimetype", b"application/epub+zip"),
1239 ("META-INF/container.xml", container_xml),
1240 ("EPUB/package.opf", opf),
1241 ("EPUB/ch1.xhtml", b"<html/>"),
1242 ("EPUB/nav.xhtml", nav),
1243 ("EPUB/script.js", b"alert('x');"),
1244 ]);
1245 let report = validate_epub_reader(std::io::Cursor::new(data));
1246 assert!(report
1247 .diagnostics()
1248 .iter()
1249 .any(|d| d.code == "MANIFEST_FOREIGN_NO_FALLBACK"));
1250 }
1251
1252 #[test]
1253 fn validate_detects_missing_encryption_cipher_reference_target() {
1254 let encryption_xml = br#"<?xml version="1.0" encoding="UTF-8"?>
1255<encryption xmlns="urn:oasis:names:tc:opendocument:xmlns:container"
1256 xmlns:enc="http://www.w3.org/2001/04/xmlenc#">
1257 <enc:EncryptedData>
1258 <enc:CipherData>
1259 <enc:CipherReference URI="../EPUB/missing-font.otf"/>
1260 </enc:CipherData>
1261 </enc:EncryptedData>
1262</encryption>"#;
1263 let data = build_zip(&[
1264 ("mimetype", b"application/epub+zip"),
1265 (
1266 "META-INF/container.xml",
1267 br#"<?xml version="1.0"?>
1268<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1269 <rootfiles>
1270 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1271 </rootfiles>
1272</container>"#,
1273 ),
1274 (
1275 "EPUB/package.opf",
1276 br#"<?xml version="1.0" encoding="UTF-8"?>
1277<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1278 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1279 <dc:title>Test Book</dc:title>
1280 <dc:creator>Tester</dc:creator>
1281 <dc:language>en</dc:language>
1282 </metadata>
1283 <manifest>
1284 <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
1285 <item id="c1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
1286 </manifest>
1287 <spine><itemref idref="c1"/></spine>
1288</package>"#,
1289 ),
1290 (
1291 "EPUB/nav.xhtml",
1292 br#"<?xml version="1.0" encoding="utf-8"?>
1293<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
1294 <body><nav epub:type="toc"><ol><li><a href="ch1.xhtml">Chapter 1</a></li></ol></nav></body>
1295</html>"#,
1296 ),
1297 (
1298 "EPUB/ch1.xhtml",
1299 br#"<html xmlns="http://www.w3.org/1999/xhtml"><body><p>Hello</p></body></html>"#,
1300 ),
1301 ("META-INF/encryption.xml", encryption_xml),
1302 ]);
1303 let report = validate_epub_reader(std::io::Cursor::new(data));
1304 assert!(report
1305 .diagnostics()
1306 .iter()
1307 .any(|d| d.code == "ENCRYPTION_REFERENCE_MISSING"));
1308 }
1309
1310 #[test]
1311 fn validate_detects_invalid_rights_xml() {
1312 let data = build_zip(&[
1313 ("mimetype", b"application/epub+zip"),
1314 (
1315 "META-INF/container.xml",
1316 br#"<?xml version="1.0"?>
1317<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
1318 <rootfiles>
1319 <rootfile full-path="EPUB/package.opf" media-type="application/oebps-package+xml"/>
1320 </rootfiles>
1321</container>"#,
1322 ),
1323 (
1324 "EPUB/package.opf",
1325 br#"<?xml version="1.0" encoding="UTF-8"?>
1326<package version="3.0" xmlns="http://www.idpf.org/2007/opf">
1327 <metadata xmlns:dc="http://purl.org/dc/elements/1.1/">
1328 <dc:title>Test Book</dc:title>
1329 <dc:creator>Tester</dc:creator>
1330 <dc:language>en</dc:language>
1331 </metadata>
1332 <manifest>
1333 <item id="nav" href="nav.xhtml" media-type="application/xhtml+xml" properties="nav"/>
1334 <item id="c1" href="ch1.xhtml" media-type="application/xhtml+xml"/>
1335 </manifest>
1336 <spine><itemref idref="c1"/></spine>
1337</package>"#,
1338 ),
1339 (
1340 "EPUB/nav.xhtml",
1341 br#"<?xml version="1.0" encoding="utf-8"?>
1342<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
1343 <body><nav epub:type="toc"><ol><li><a href="ch1.xhtml">Chapter 1</a></li></ol></nav></body>
1344</html>"#,
1345 ),
1346 (
1347 "EPUB/ch1.xhtml",
1348 br#"<html xmlns="http://www.w3.org/1999/xhtml"><body><p>Hello</p></body></html>"#,
1349 ),
1350 ("META-INF/rights.xml", b"<rights><broken></rights>"),
1351 ]);
1352 let report = validate_epub_reader(std::io::Cursor::new(data));
1353 assert!(report
1354 .diagnostics()
1355 .iter()
1356 .any(|d| d.code == "RIGHTS_XML_PARSE_ERROR"));
1357 }
1358}