use std::sync::LazyLock;
use super::context::{
SEPS, TRIM_CHARS, UnclaimedGap, find_invariant_text, find_unclaimed_gaps, strip_extension_pos,
};
use crate::matcher::span::{MatchSpan, Property};
#[derive(Debug, Clone, Default)]
pub(crate) struct InvarianceReport {
pub title: Option<String>,
pub title_start: Option<usize>,
pub year_signals: Vec<YearSignal>,
pub episode_signals: Vec<EpisodeSignal>,
}
#[derive(Debug, Clone)]
pub(crate) struct YearSignal {
pub start: usize,
pub end: usize,
pub value: u32,
pub is_invariant: bool,
}
#[derive(Debug, Clone)]
pub(crate) struct EpisodeSignal {
pub start: usize,
pub end: usize,
pub value: u32,
pub is_sequential: bool,
pub digit_count: usize,
}
#[derive(Debug, Clone)]
struct NumberInGap {
start: usize,
end: usize,
value: u32,
digit_count: usize,
gap_idx: usize,
idx_within_gap: usize,
}
pub(crate) struct FileAnalysis<'a> {
pub input: &'a str,
pub matches: &'a [MatchSpan],
}
static GAP_NUMBER: LazyLock<regex::Regex> =
LazyLock::new(|| regex::Regex::new(r"\d+").expect("GAP_NUMBER regex is valid"));
pub(crate) fn analyze_invariance(
target: &FileAnalysis<'_>,
siblings: &[FileAnalysis<'_>],
) -> InvarianceReport {
if siblings.is_empty() {
return InvarianceReport::default();
}
let target_gaps = find_unclaimed_gaps(target.input, target.matches);
let sibling_gaps: Vec<Vec<UnclaimedGap>> = siblings
.iter()
.map(|s| find_unclaimed_gaps(s.input, s.matches))
.collect();
let target_pre = title_candidate_gaps(&target_gaps, target.input, target.matches);
let sibling_pre: Vec<Vec<UnclaimedGap>> = siblings
.iter()
.zip(&sibling_gaps)
.map(|(s, gaps)| title_candidate_gaps(gaps, s.input, s.matches))
.collect();
let all_gap_refs: Vec<&[UnclaimedGap]> = std::iter::once(target_pre.as_slice())
.chain(sibling_pre.iter().map(|v| v.as_slice()))
.collect();
let (title, title_start) = match find_invariant_text(&all_gap_refs) {
Some((start, text)) => (Some(text), Some(start)),
None => (None, None),
};
let target_numbers = extract_numbers_from_gaps(target.input, &target_gaps);
let sibling_numbers: Vec<Vec<NumberInGap>> = siblings
.iter()
.zip(&sibling_gaps)
.map(|(s, gaps)| extract_numbers_from_gaps(s.input, gaps))
.collect();
let mut year_signals = classify_year_signals(&target_numbers, &sibling_numbers);
let mut episode_signals = classify_episode_signals(&target_numbers, &sibling_numbers);
let claimed_year_signals = classify_claimed_year_signals(target, siblings);
year_signals.extend(claimed_year_signals);
let claimed_ep_signals = classify_claimed_decomposed_episodes(target, siblings);
episode_signals.extend(claimed_ep_signals);
let (title, title_start) =
expand_title_with_invariant_years(target.input, title, title_start, &year_signals);
InvarianceReport {
title,
title_start,
year_signals,
episode_signals,
}
}
fn title_candidate_gaps(
gaps: &[UnclaimedGap],
input: &str,
matches: &[MatchSpan],
) -> Vec<UnclaimedGap> {
let fn_start = crate::filename_start(input);
let first_anchor = matches
.iter()
.filter(|m| {
m.start >= fn_start
&& matches!(
m.property,
Property::Season | Property::Episode | Property::Year
)
})
.map(|m| m.start)
.min();
match first_anchor {
Some(anchor) => gaps
.iter()
.filter(|g| find_gap_end_in_input(input, g) <= anchor)
.cloned()
.collect(),
None => gaps.to_vec(),
}
}
fn expand_title_with_invariant_years(
input: &str,
title: Option<String>,
title_start: Option<usize>,
year_signals: &[YearSignal],
) -> (Option<String>, Option<usize>) {
let title_text = match title.as_deref() {
Some(t) => t,
None => return (title, title_start),
};
let title_start_pos = match title_start {
Some(s) => s,
None => return (title, title_start),
};
let title_end = find_title_end_in_input(input, title_start_pos, title_text);
let mut expanded_start = title_start_pos;
let mut expanded_end = title_end;
let mut sorted_signals: Vec<&YearSignal> =
year_signals.iter().filter(|ys| ys.is_invariant).collect();
sorted_signals.sort_by_key(|ys| ys.start);
for ys in sorted_signals {
if ys.end <= expanded_start {
let between = &input[ys.end..expanded_start];
if between.chars().all(|c| SEPS.contains(&c)) {
expanded_start = ys.start;
}
}
if ys.start >= expanded_end {
let between = &input[expanded_end..ys.start];
if between.chars().all(|c| SEPS.contains(&c)) {
expanded_end = ys.end;
}
}
}
if expanded_start == title_start_pos && expanded_end == title_end {
return (Some(title_text.to_string()), Some(expanded_start));
}
let raw = &input[expanded_start..expanded_end];
let normalized: String = raw
.chars()
.map(|c| if SEPS.contains(&c) { ' ' } else { c })
.collect();
(Some(normalized.trim().to_string()), Some(expanded_start))
}
fn find_title_end_in_input(input: &str, title_start: usize, title_text: &str) -> usize {
let mut pos = title_start;
let mut title_chars = title_text.chars().filter(|c| !c.is_whitespace()).peekable();
for ch in input[title_start..].chars() {
if title_chars.peek().is_none() {
break;
}
pos += ch.len_utf8();
if !SEPS.contains(&ch) && !TRIM_CHARS.contains(&ch) {
title_chars.next();
}
}
pos
}
fn extract_numbers_from_gaps(input: &str, gaps: &[UnclaimedGap]) -> Vec<NumberInGap> {
let mut numbers = Vec::new();
for (gap_idx, gap) in gaps.iter().enumerate() {
let gap_end = find_gap_end_in_input(input, gap);
let gap_slice = &input[gap.start..gap_end];
let mut idx_within_gap = 0;
for m in GAP_NUMBER.find_iter(gap_slice) {
let abs_start = gap.start + m.start();
let abs_end = gap.start + m.end();
let digit_str = m.as_str();
if let Ok(n) = digit_str.parse::<u32>() {
if crate::CODEC_NUMBERS.contains(&n) {
continue;
}
} else {
continue;
}
let value: u32 = digit_str.parse().expect("already validated as numeric");
numbers.push(NumberInGap {
start: abs_start,
end: abs_end,
value,
digit_count: digit_str.len(),
gap_idx,
idx_within_gap,
});
idx_within_gap += 1;
}
}
numbers
}
fn find_gap_end_in_input(input: &str, gap: &UnclaimedGap) -> usize {
let scan_end = strip_extension_pos(input);
let mut pos = gap.start;
let mut content_chars = 0;
let target_chars = gap.text.chars().filter(|c| !c.is_whitespace()).count();
for ch in input[gap.start..scan_end].chars() {
pos += ch.len_utf8();
if !SEPS.contains(&ch) && !TRIM_CHARS.contains(&ch) {
content_chars += 1;
}
if content_chars >= target_chars {
break;
}
}
pos
}
fn classify_year_signals(
target_numbers: &[NumberInGap],
sibling_numbers: &[Vec<NumberInGap>],
) -> Vec<YearSignal> {
let mut signals = Vec::new();
for tn in target_numbers {
if tn.digit_count != 4 || !(1920..=2039).contains(&tn.value) {
continue;
}
let mut all_same = true;
let mut found_in_all = true;
for sib_nums in sibling_numbers {
let aligned = sib_nums
.iter()
.find(|sn| sn.gap_idx == tn.gap_idx && sn.idx_within_gap == tn.idx_within_gap);
match aligned {
Some(sn) => {
if sn.value != tn.value {
all_same = false;
}
}
None => {
found_in_all = false;
break;
}
}
}
if found_in_all {
signals.push(YearSignal {
start: tn.start,
end: tn.end,
value: tn.value,
is_invariant: all_same,
});
}
}
signals
}
fn classify_claimed_year_signals(
target: &FileAnalysis<'_>,
siblings: &[FileAnalysis<'_>],
) -> Vec<YearSignal> {
use crate::matcher::span::Property;
let mut signals = Vec::new();
for tm in target.matches {
if tm.property != Property::Year {
continue;
}
let target_value: u32 = match tm.value.parse() {
Ok(v) => v,
Err(_) => continue,
};
let mut all_same = true;
let mut found_in_all = true;
for sib in siblings {
let sib_year = sib.matches.iter().find(|m| m.property == Property::Year);
match sib_year {
Some(sy) => {
if let Ok(sv) = sy.value.parse::<u32>() {
if sv != target_value {
all_same = false;
}
} else {
found_in_all = false;
break;
}
}
None => {
found_in_all = false;
break;
}
}
}
if found_in_all {
signals.push(YearSignal {
start: tm.start,
end: tm.end,
value: target_value,
is_invariant: all_same,
});
}
}
signals
}
fn classify_claimed_decomposed_episodes(
target: &FileAnalysis<'_>,
siblings: &[FileAnalysis<'_>],
) -> Vec<EpisodeSignal> {
use crate::matcher::span::Property;
let mut signals = Vec::new();
for tm in target.matches {
if tm.property != Property::Season || tm.priority > 0 {
continue;
}
let ep_match = target.matches.iter().find(|m| {
m.property == Property::Episode
&& m.start == tm.start
&& m.end == tm.end
&& m.priority <= 0
});
let ep_match = match ep_match {
Some(m) => m,
None => continue,
};
let season: u32 = match tm.value.parse() {
Ok(v) => v,
Err(_) => continue,
};
let episode: u32 = match ep_match.value.parse() {
Ok(v) => v,
Err(_) => continue,
};
let raw_value = season * 100 + episode;
let mut values: Vec<u32> = vec![raw_value];
let mut found_in_all = true;
for sib in siblings {
let sib_season = sib
.matches
.iter()
.find(|m| m.property == Property::Season && m.priority <= 0);
let sib_season = match sib_season {
Some(s) => s,
None => {
found_in_all = false;
break;
}
};
let sib_ep = sib.matches.iter().find(|m| {
m.property == Property::Episode
&& m.start == sib_season.start
&& m.end == sib_season.end
&& m.priority <= 0
});
let sib_ep = match sib_ep {
Some(e) => e,
None => {
found_in_all = false;
break;
}
};
let ss: u32 = match sib_season.value.parse() {
Ok(v) => v,
Err(_) => {
found_in_all = false;
break;
}
};
let se: u32 = match sib_ep.value.parse() {
Ok(v) => v,
Err(_) => {
found_in_all = false;
break;
}
};
values.push(ss * 100 + se);
}
if !found_in_all {
continue;
}
let all_same = values.iter().all(|v| *v == values[0]);
if all_same {
continue;
}
let is_sequential = is_sequential_set(&values);
if is_sequential {
signals.push(EpisodeSignal {
start: tm.start,
end: tm.end,
value: raw_value,
is_sequential: true,
digit_count: 3,
});
}
}
signals
}
fn classify_episode_signals(
target_numbers: &[NumberInGap],
sibling_numbers: &[Vec<NumberInGap>],
) -> Vec<EpisodeSignal> {
let mut signals = Vec::new();
for tn in target_numbers {
if tn.digit_count == 4 && (1920..=2039).contains(&tn.value) {
continue;
}
let mut values: Vec<u32> = vec![tn.value];
let mut found_in_all = true;
for sib_nums in sibling_numbers {
let aligned = sib_nums
.iter()
.find(|sn| sn.gap_idx == tn.gap_idx && sn.idx_within_gap == tn.idx_within_gap);
match aligned {
Some(sn) => values.push(sn.value),
None => {
found_in_all = false;
break;
}
}
}
if !found_in_all {
continue;
}
let all_same = values.iter().all(|v| *v == values[0]);
if all_same {
continue;
}
let is_sequential = is_sequential_set(&values);
signals.push(EpisodeSignal {
start: tn.start,
end: tn.end,
value: tn.value,
is_sequential,
digit_count: tn.digit_count,
});
}
signals
}
fn is_sequential_set(values: &[u32]) -> bool {
if values.len() < 2 {
return false;
}
let mut sorted: Vec<u32> = values.to_vec();
sorted.sort_unstable();
sorted.dedup();
if sorted.len() < 2 {
return false;
}
let min = sorted[0];
let max = sorted[sorted.len() - 1];
(max - min + 1) as usize == sorted.len()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matcher::span::Property;
fn make_match_at(start: usize, end: usize, property: Property, value: &str) -> MatchSpan {
MatchSpan::new(start, end, property, value)
}
#[test]
fn is_sequential_basic() {
assert!(is_sequential_set(&[3, 4, 5]));
assert!(is_sequential_set(&[1, 2]));
assert!(is_sequential_set(&[501, 502, 503]));
}
#[test]
fn is_sequential_out_of_order() {
assert!(is_sequential_set(&[5, 3, 4]));
}
#[test]
fn is_sequential_gaps_not_sequential() {
assert!(!is_sequential_set(&[1, 3, 5]));
assert!(!is_sequential_set(&[1, 10]));
}
#[test]
fn is_sequential_single_value() {
assert!(!is_sequential_set(&[5]));
}
#[test]
fn is_sequential_all_same() {
assert!(!is_sequential_set(&[5, 5, 5]));
}
#[test]
fn year_invariant_detected() {
let target_input = "2001.A.Space.Odyssey.1080p.mkv";
let sib_input = "2001.A.Space.Odyssey.720p.mkv";
let target_matches = vec![make_match_at(21, 26, Property::ScreenSize, "1080p")];
let sib_matches = vec![make_match_at(21, 25, Property::ScreenSize, "720p")];
let report = analyze_invariance(
&FileAnalysis {
input: target_input,
matches: &target_matches,
},
&[FileAnalysis {
input: sib_input,
matches: &sib_matches,
}],
);
let year_2001: Vec<_> = report
.year_signals
.iter()
.filter(|y| y.value == 2001)
.collect();
assert!(!year_2001.is_empty(), "should detect 2001 as year signal");
assert!(
year_2001[0].is_invariant,
"2001 should be invariant (title content)"
);
}
#[test]
fn year_variant_detected() {
let target_input = "Movie.2023.1080p.mkv";
let sib_input = "Movie.2024.1080p.mkv";
let target_matches = vec![make_match_at(11, 16, Property::ScreenSize, "1080p")];
let sib_matches = vec![make_match_at(11, 16, Property::ScreenSize, "1080p")];
let report = analyze_invariance(
&FileAnalysis {
input: target_input,
matches: &target_matches,
},
&[FileAnalysis {
input: sib_input,
matches: &sib_matches,
}],
);
let year_signals: Vec<_> = report
.year_signals
.iter()
.filter(|y| (2023..=2024).contains(&y.value))
.collect();
assert!(!year_signals.is_empty(), "should detect year signal");
assert!(
!year_signals[0].is_invariant,
"year should be variant (metadata)"
);
}
#[test]
fn episode_sequential_detected() {
let target = "Show.03.720p.mkv";
let sib = "Show.04.720p.mkv";
let target_matches = vec![make_match_at(9, 13, Property::ScreenSize, "720p")];
let sib_matches = vec![make_match_at(9, 13, Property::ScreenSize, "720p")];
let report = analyze_invariance(
&FileAnalysis {
input: target,
matches: &target_matches,
},
&[FileAnalysis {
input: sib,
matches: &sib_matches,
}],
);
assert!(
!report.episode_signals.is_empty(),
"should detect episode signal"
);
let ep = &report.episode_signals[0];
assert_eq!(ep.value, 3);
assert!(ep.is_sequential, "episodes should be sequential");
assert_eq!(ep.digit_count, 2);
}
#[test]
fn episode_three_digit_sequential() {
let target = "Show.501.720p.mkv";
let sib1 = "Show.502.720p.mkv";
let sib2 = "Show.503.720p.mkv";
let target_matches = vec![make_match_at(9, 13, Property::ScreenSize, "720p")];
let sib1_matches = vec![make_match_at(9, 13, Property::ScreenSize, "720p")];
let sib2_matches = vec![make_match_at(9, 13, Property::ScreenSize, "720p")];
let report = analyze_invariance(
&FileAnalysis {
input: target,
matches: &target_matches,
},
&[
FileAnalysis {
input: sib1,
matches: &sib1_matches,
},
FileAnalysis {
input: sib2,
matches: &sib2_matches,
},
],
);
assert!(
!report.episode_signals.is_empty(),
"should detect 3-digit episode"
);
let ep = &report.episode_signals[0];
assert_eq!(ep.value, 501);
assert!(ep.is_sequential);
assert_eq!(ep.digit_count, 3);
}
#[test]
fn no_siblings_empty_report() {
let target = "Movie.2024.1080p.mkv";
let matches = vec![make_match_at(11, 16, Property::ScreenSize, "1080p")];
let report = analyze_invariance(
&FileAnalysis {
input: target,
matches: &matches,
},
&[],
);
assert!(report.title.is_none());
assert!(report.year_signals.is_empty());
assert!(report.episode_signals.is_empty());
}
#[test]
fn invariant_number_not_episode() {
let target = "Show.42.720p.mkv";
let sib = "Show.42.1080p.mkv";
let target_matches = vec![make_match_at(8, 12, Property::ScreenSize, "720p")];
let sib_matches = vec![make_match_at(8, 13, Property::ScreenSize, "1080p")];
let report = analyze_invariance(
&FileAnalysis {
input: target,
matches: &target_matches,
},
&[FileAnalysis {
input: sib,
matches: &sib_matches,
}],
);
assert!(
report.episode_signals.is_empty(),
"invariant number should not produce episode signal, got: {:?}",
report.episode_signals
);
}
}