use std::borrow::ToOwned;
use once_cell::sync::Lazy;
use regex::Regex;
static RE_STEP_REFERENCE: Lazy<Regex> = Lazy::new(|| Regex::new(r"/[0-9]+").unwrap());
static RE_ASSERTIONS: Lazy<Regex> = Lazy::new(|| {
Regex::new(
r"(?x)
# Captures opening square bracket e.g. `[`
\[
# Captures anything but square brackets e.g. `chap01`
[^\[\]]*
# Captures closing square bracket e.g. `]`
\]
",
)
.unwrap()
});
static RE_CHARACTER_OFFSET: Lazy<Regex> = Lazy::new(|| Regex::new(r":[0-9]+$").unwrap());
static RE_TEMPORAL_OFFSET: Lazy<Regex> = Lazy::new(|| Regex::new(r"~[0-9]+\.[0-9]+").unwrap());
static RE_SPACIAL_OFFSET: Lazy<Regex> = Lazy::new(|| Regex::new(r"@[0-9.]+:[0-9.]+").unwrap());
#[must_use]
pub fn parse(raw: &str) -> String {
if !raw.starts_with("epubcfi(") && !raw.ends_with(')') {
return String::new();
}
let mut location = raw[8..raw.len() - 1].to_owned();
location = RE_ASSERTIONS.replace_all(&location, "").into_owned();
location = RE_TEMPORAL_OFFSET.replace_all(&location, "").into_owned();
location = RE_SPACIAL_OFFSET.replace_all(&location, "").into_owned();
let mut parts: Vec<&str> = location.split(',').collect();
parts = match parts[..] {
[parent_path, range_start, _] => {
vec![parent_path, range_start]
}
_ => parts,
};
location = parts.join("");
let mut steps = RE_STEP_REFERENCE
.find_iter(&location)
.map(|m| m.as_str())
.map(ToOwned::to_owned)
.collect::<String>();
steps.remove(0);
steps = steps.replace('/', ".");
let character_offset = RE_CHARACTER_OFFSET
.find(&location)
.map(|m| m.as_str())
.map_or_else(String::new, ToOwned::to_owned);
location = format!("{steps}{character_offset}");
location
}
#[cfg(test)]
mod test_epubcfi_parser {
use super::*;
macro_rules! test_parse {
($($name:ident: $value:expr,)*) => {
$(
#[test]
fn $name() {
let (raw, expected) = $value;
let parsed = parse(raw);
assert_eq!(parsed, expected);
}
)*
}
}
macro_rules! test_compare {
($($name:ident: ($lhs:tt $cmp:tt $rhs:tt),)*) => {
$(
#[test]
fn $name() {
let lhs_parsed = parse($lhs);
let rhs_parsed = parse($rhs);
assert!(lhs_parsed $cmp rhs_parsed);
}
)*
}
}
test_parse! {
test_parse_00: (
"epubcfi(/1/2)",
"1.2",
),
test_parse_01: (
"epubcfi(/1/0)",
"1.0",
),
test_parse_02: (
"epubcfi(/1/2:3[pre,post])",
"1.2:3",
),
test_parse_03: (
"epubcfi(/1/2:3[,post])",
"1.2:3",
),
test_parse_04: (
"epubcfi(/1/2:3[pre,])",
"1.2:3",
),
test_parse_05: (
"epubcfi(/1[^^^]])",
"1",
),
test_parse_06: (
"epubcfi(/6/14[cha!/p05ref]!/4[bo!/dy01]/10/2/1[foo]:5[don't!/ panic;s=b])",
"6.14.4.10.2.1:5",
),
test_parse_07: (
"epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:5)",
"6.4.4.10.3:5",
),
test_parse_08: (
"epubcfi(/6/4[chap01ref]!/4/10/0)",
"6.4.4.10.0",
),
test_parse_09: (
"epubcfi(/6/4[chap01ref]!/4/10/999)",
"6.4.4.10.999",
),
test_parse_10: (
"epubcfi(/6/4[chap01ref]!/4[body01],/10[para05]/3:5,/10[para05]/3:8)",
"6.4.4.10.3:5",
),
test_parse_11: (
"epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/3:3[34,67])",
"6.4.4.10.3:3",
),
test_parse_12: (
"epubcfi(/6/14[cha!/p05ref]!/4[bo!/dy01]/10/2/1[foo]~42.43@100:101)",
"6.14.4.10.2.1",
),
test_parse_13: (
"epubcfi(/2~42.43@100:101/4!/6/8:100/6:200)",
"2.4.6.8.6:200",
),
test_parse_14: (
"epubcfi(/2/4vnd.foo/6foo.bar:20)",
"2.4.6:20",
),
test_parse_15: (
"epubcfi(/6/4[chap01ref]!/4[body01]/10[para05],/2/1:1,/3:4)",
"6.4.4.10.2.1:1",
),
test_parse_16: (
"epubcfi(/6/4[chap01ref]!/4[body01]/10[para05]/1:3[xx,y])",
"6.4.4.10.1:3",
),
test_parse_17: (
"epubcfi(/6/28[chap06]!/4/24[para06]/1,:4,:44)",
"6.28.4.24.1:4",
),
test_parse_18: (
"epubcfi(/2/4[node-id]!/6/7:5[pre,post;s=b])",
"2.4.6.7:5",
),
test_parse_19: (
"epubcfi(/2/4@4:2)",
"2.4",
),
test_parse_20: (
"epubcfi(/2/4~3.14)",
"2.4",
),
test_parse_21: (
"epubcfi(/2/4~3.14@4:2)",
"2.4",
),
}
test_compare! {
test_compare_00: (
"epubcfi(/2)" < "epubcfi(/6)"
),
test_compare_01: (
"epubcfi(/2/4!/6)" < "epubcfi(/2/4!/7)"
),
test_compare_02: (
"epubcfi(/2/4!/8)" > "epubcfi(/2/4!/7)"
),
test_compare_03: (
"epubcfi(/2/4!/6[foo]/42!/12:100[lol])" < "epubcfi(/2/4!/6[bar]/44!/12:100[cat])"
),
test_compare_04: (
"epubcfi(/2/4!/6[foo]/44!/12:100[lol])" == "epubcfi(/2/4!/6[bar]/44!/12:100[cat])"
),
test_compare_05: (
"epubcfi(/2/4!/6[bar]/44!/12:100[cat])" == "epubcfi(/2/4!/6[bar]/44!/12:100[cat])"
),
test_compare_06: (
"epubcfi(/2/4!/6[bar]/44!/3~1.11@1:1)" == "epubcfi(/2/4!/6[bar]/44!/3~2.22@2:2)"
),
test_compare_07: (
"epubcfi(/2/4,/6/8,/10/12)" == "epubcfi(/2/4,/6/8,/10/12)"
),
test_compare_08: (
"epubcfi(/2/4,/6/7,/10/11)" < "epubcfi(/2/4,/6/8,/10/12)"
),
test_compare_09: (
"epubcfi(/2/2,/6/8,/10/12)" < "epubcfi(/2/4,/6/8,/10/12)"
),
test_compare_10: (
"epubcfi(/2/4,/6/8,/10/13)" > "epubcfi(/2/4/6/7)"
),
test_compare_11: (
"epubcfi(/2/4,/6/8,/10/13)" == "epubcfi(/2/4/6/8)"
),
test_compare_12: (
"epubcfi(/2/4!/6[bar]/44!/12:100[hah])" < "epubcfi(/2/4!/6[bar]/44!/12:200[cat])"
),
}
}