use std::collections::HashMap;
use std::path::PathBuf;
use super::Check;
use crate::extracted::ExtractedEpub;
use crate::validate::ValidationReport;
pub struct FilenameChecks;
impl Check for FilenameChecks {
fn ids(&self) -> &'static [&'static str] {
&["R13.1", "R13.2", "R13.3", "R13.4", "R13.5"]
}
fn run(&self, epub: &ExtractedEpub, report: &mut ValidationReport) {
let opf = &epub.opf;
for (_id, (href, _media_type)) in &opf.manifest {
let file = Some(PathBuf::from(href));
if let Some(bad) = ocf_illegal_char(href) {
report.emit_at(
"R13.1",
format!("Illegal character {} in href '{}'.", describe_char(bad), href),
file.clone(),
None,
);
}
if has_space(href) {
report.emit_at(
"R13.2",
format!("Space in href '{}'.", href),
file.clone(),
None,
);
}
if ends_with_dot(href) {
report.emit_at(
"R13.3",
format!("Href '{}' ends with '.'.", href),
file.clone(),
None,
);
}
if has_non_ascii(href) {
report.emit_at(
"R13.4",
format!("Non-ASCII character in href '{}'.", href),
file.clone(),
None,
);
}
}
let mut seen: HashMap<String, String> = HashMap::new();
let mut reported_pairs: Vec<(String, String)> = Vec::new();
for (_id, (href, _media_type)) in &opf.manifest {
let folded = href.to_lowercase();
if let Some(prev) = seen.get(&folded) {
if prev == href {
continue;
}
let pair = if prev <= href {
(prev.clone(), href.clone())
} else {
(href.clone(), prev.clone())
};
if !reported_pairs.contains(&pair) {
report.emit_at(
"R13.5",
format!(
"Manifest hrefs '{}' and '{}' are equal after case-folding.",
pair.0, pair.1
),
Some(PathBuf::from(&pair.1)),
None,
);
reported_pairs.push(pair);
}
} else {
seen.insert(folded, href.clone());
}
}
}
}
fn ocf_illegal_char(href: &str) -> Option<char> {
href.chars().find(|c| is_ocf_illegal(*c))
}
fn is_ocf_illegal(c: char) -> bool {
matches!(c, '<' | '>' | ':' | '"' | '|' | '?' | '*') || (c as u32) < 0x20
}
fn has_space(href: &str) -> bool {
href.contains(' ')
}
fn ends_with_dot(href: &str) -> bool {
href.ends_with('.')
}
fn has_non_ascii(href: &str) -> bool {
href.chars().any(|c| (c as u32) > 0x7E)
}
fn describe_char(c: char) -> String {
if (c as u32) < 0x20 {
format!("U+{:04X}", c as u32)
} else {
format!("'{}'", c)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn r13_1_less_than_fires() {
assert_eq!(ocf_illegal_char("foo<bar.html"), Some('<'));
}
#[test]
fn r13_1_asterisk_fires() {
assert_eq!(ocf_illegal_char("foo*bar.html"), Some('*'));
}
#[test]
fn r13_1_colon_fires() {
assert_eq!(ocf_illegal_char("OEBPS/chapter:1.html"), Some(':'));
}
#[test]
fn r13_1_control_char_fires() {
let bad = format!("foo{}bar.html", '\u{0007}');
assert_eq!(ocf_illegal_char(&bad), Some('\u{0007}'));
}
#[test]
fn r13_1_plain_ascii_clean() {
assert!(ocf_illegal_char("OEBPS/content.html").is_none());
}
#[test]
fn r13_1_slash_clean() {
assert!(ocf_illegal_char("OEBPS/sub/content.html").is_none());
}
#[test]
fn r13_2_space_fires() {
assert!(has_space("chapter 1.html"));
}
#[test]
fn r13_2_no_space_clean() {
assert!(!has_space("chapter_1.html"));
}
#[test]
fn r13_3_trailing_dot_fires() {
assert!(ends_with_dot("chapter."));
}
#[test]
fn r13_3_no_trailing_dot_clean() {
assert!(!ends_with_dot("chapter.html"));
}
#[test]
fn r13_3_dot_in_middle_clean() {
assert!(!ends_with_dot("chapter.1.html"));
}
#[test]
fn r13_4_greek_letter_fires() {
assert!(has_non_ascii("κεφάλαιο.html"));
}
#[test]
fn r13_4_em_dash_fires() {
let href = format!("chapter{}one.html", '\u{2014}');
assert!(has_non_ascii(&href));
}
#[test]
fn r13_4_pure_ascii_clean() {
assert!(!has_non_ascii("chapter_one.html"));
}
#[test]
fn r13_4_tilde_boundary_clean() {
assert!(!has_non_ascii("chapter~one.html"));
}
#[test]
fn r13_5_case_fold_collides() {
let a = "Foo.html".to_lowercase();
let b = "foo.html".to_lowercase();
assert_eq!(a, b);
}
#[test]
fn r13_5_distinct_names_clean() {
let a = "foo.html".to_lowercase();
let b = "bar.html".to_lowercase();
assert_ne!(a, b);
}
#[test]
fn describe_char_control_uses_codepoint() {
assert_eq!(describe_char('\u{0007}'), "U+0007");
}
#[test]
fn describe_char_printable_quoted() {
assert_eq!(describe_char('<'), "'<'");
}
}