use std::sync::OnceLock;
use regex::Regex;
#[derive(Debug)]
pub struct SubjectParts {
pub base_subject: String,
pub part_index: Option<u32>,
pub part_total: Option<u32>,
}
struct Pattern {
re: Regex,
}
fn patterns() -> &'static [Pattern; 5] {
static PATTERNS: OnceLock<[Pattern; 5]> = OnceLock::new();
PATTERNS.get_or_init(|| {
[
Pattern {
re: Regex::new(r"\([ \t]*([0-9]{1,6})[ \t]*/[ \t]*([0-9]{1,6})[ \t]*\)").unwrap(),
},
Pattern {
re: Regex::new(r"\[[ \t]*([0-9]{1,6})[ \t]*/[ \t]*([0-9]{1,6})[ \t]*\]").unwrap(),
},
Pattern {
re: Regex::new(r"(?i)\bpart[ \t]+([0-9]{1,6})[ \t]*/[ \t]*([0-9]{1,6})\b").unwrap(),
},
Pattern {
re: Regex::new(r"(?i)\bpart[ \t]*([0-9]{1,6})[ \t]*of[ \t]*([0-9]{1,6})\b")
.unwrap(),
},
Pattern {
re: Regex::new(r"[ \t]+-[ \t]+([0-9]{1,6})[ \t]*/[ \t]*([0-9]{1,6})\b").unwrap(),
},
]
})
}
fn yenc_re() -> &'static Regex {
static RE: OnceLock<Regex> = OnceLock::new();
RE.get_or_init(|| Regex::new(r"(?i)\byenc\b").unwrap())
}
fn strip_prefixes(s: &str) -> &str {
static RE: OnceLock<Regex> = OnceLock::new();
let re = RE.get_or_init(|| Regex::new(r"(?i)^(re|fwd?)[ \t]*:[ \t]*").unwrap());
let mut cur = s;
loop {
let stripped = re.find(cur).map(|m| &cur[m.end()..]).unwrap_or(cur);
if stripped.len() == cur.len() {
break;
}
cur = stripped;
}
cur
}
pub fn parse_subject(subject: &str) -> Option<SubjectParts> {
if subject.is_empty() {
return None;
}
if yenc_re().is_match(subject) {
return None;
}
let stripped = strip_prefixes(subject).trim();
if stripped.is_empty() {
return None;
}
for pat in patterns() {
if let Some(caps) = pat.re.captures(stripped) {
let part_index: u32 = match caps[1].parse() {
Ok(n) => n,
Err(_) => continue,
};
let part_total: u32 = match caps[2].parse() {
Ok(n) => n,
Err(_) => continue,
};
let m = caps.get(0).unwrap();
let before = stripped[..m.start()].trim_end();
let after = stripped[m.end()..].trim_start();
let raw = if before.is_empty() {
after.to_string()
} else if after.is_empty() {
before.to_string()
} else {
format!("{} {}", before, after)
};
let base_subject = raw
.trim_end_matches(|c: char| c == '-' || c.is_whitespace())
.trim()
.to_string();
if base_subject.is_empty() {
return None;
}
return Some(SubjectParts {
base_subject,
part_index: Some(part_index),
part_total: Some(part_total),
});
}
}
Some(SubjectParts {
base_subject: stripped.to_string(),
part_index: None,
part_total: None,
})
}
#[cfg(test)]
mod tests {
use super::*;
fn parts(subject: &str) -> SubjectParts {
parse_subject(subject).unwrap()
}
#[test]
fn paren_fraction_basic() {
let p = parts("bigfile.rar (1/5)");
assert_eq!(p.part_index, Some(1));
assert_eq!(p.part_total, Some(5));
assert_eq!(p.base_subject, "bigfile.rar");
}
#[test]
fn paren_fraction_leading_zero() {
let p = parts("filename.tar.gz (03/17)");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
assert_eq!(p.base_subject, "filename.tar.gz");
}
#[test]
fn paren_fraction_spaces_inside() {
let p = parts("file.zip ( 2 / 7 )");
assert_eq!(p.part_index, Some(2));
assert_eq!(p.part_total, Some(7));
}
#[test]
fn bracket_fraction_basic() {
let p = parts("image.jpg [2/4]");
assert_eq!(p.part_index, Some(2));
assert_eq!(p.part_total, Some(4));
assert_eq!(p.base_subject, "image.jpg");
}
#[test]
fn bracket_fraction_not_binary_tag() {
let p = parts("[BINARY] filename - Part 3 of 12");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(12));
}
#[test]
fn part_slash_basic() {
let p = parts("file.zip Part 3/17");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
}
#[test]
fn part_slash_lowercase() {
let p = parts("file.tar.gz part 2/5");
assert_eq!(p.part_index, Some(2));
assert_eq!(p.part_total, Some(5));
}
#[test]
fn part_of_with_spaces() {
let p = parts("file.zip Part 03 of 17");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
}
#[test]
fn part_of_no_spaces() {
let p = parts("archive.tar.gz part3of17");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
}
#[test]
fn binary_tag_part_of() {
let p = parts("[BINARY] filename - Part 3 of 12");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(12));
}
#[test]
fn dash_fraction() {
let p = parts("filename.tar.gz - 03/17");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
assert_eq!(p.base_subject, "filename.tar.gz");
}
#[test]
fn part_zero_toc() {
let p = parts("filename.tar.gz (00/17)");
assert_eq!(p.part_index, Some(0));
assert_eq!(p.part_total, Some(17));
}
#[test]
fn yenc_returns_none() {
assert!(parse_subject("\"file.nfo\" yEnc (1/3)").is_none());
}
#[test]
fn yenc_uppercase_returns_none() {
assert!(parse_subject("\"file.nfo\" YENC (1/3)").is_none());
}
#[test]
fn no_marker_returns_some_none_fields() {
let p = parts("plain subject");
assert_eq!(p.base_subject, "plain subject");
assert_eq!(p.part_index, None);
assert_eq!(p.part_total, None);
}
#[test]
fn empty_returns_none() {
assert!(parse_subject("").is_none());
}
#[test]
fn bare_paren_marker_returns_none() {
assert!(parse_subject("(1/3)").is_none());
}
#[test]
fn bare_paren_marker_with_spaces_returns_none() {
assert!(parse_subject(" (1/3) ").is_none());
}
#[test]
fn bare_bracket_marker_returns_none() {
assert!(parse_subject("[2/4]").is_none());
}
#[test]
fn bare_part_marker_only_returns_none() {
assert!(parse_subject("Part 1 of 3").is_none());
assert!(parse_subject("Part 1/3").is_none());
}
#[test]
fn re_prefix_stripped() {
let p = parts("Re: filename.tar.gz (03/17)");
assert_eq!(p.part_index, Some(3));
assert_eq!(p.part_total, Some(17));
}
#[test]
fn fwd_prefix_stripped() {
let p = parts("Fwd: filename.tar.gz (03/17)");
assert_eq!(p.part_index, Some(3));
}
#[test]
fn fw_prefix_stripped() {
let p = parts("Fw: filename.tar.gz (03/17)");
assert_eq!(p.part_index, Some(3));
}
#[test]
fn nested_re_prefix_stripped() {
let p = parts("Re: Re: filename.tar.gz (03/17)");
assert_eq!(p.part_index, Some(3));
}
#[test]
fn unicode_stem_no_panic() {
let p = parts("日本語ファイル (1/3)");
assert_eq!(p.part_index, Some(1));
assert_eq!(p.part_total, Some(3));
assert!(p.base_subject.contains('日'));
}
#[test]
fn base_subject_trailing_dash_stripped() {
let p = parts("myfile.bin (2/5)");
assert!(!p.base_subject.ends_with('-'));
assert!(!p.base_subject.ends_with(' '));
}
#[test]
fn parse_subject_returns_none_for_all_prefix_input() {
assert!(parse_subject("Re: ").is_none());
assert!(parse_subject("Fwd: Re: ").is_none());
assert!(parse_subject(" ").is_none());
}
}