use crate::matcher::span::MatchSpan;
pub(crate) const SEPS: &[char] = &[
'.', ' ', '_', '-', '+', '/', '\\', ];
pub(crate) const TRIM_CHARS: &[char] = &[
'(', ')', '[', ']', '{', '}', '.', ' ', '_', '-', '+', '\u{3000}',
];
#[derive(Debug, Clone)]
pub(crate) struct UnclaimedGap {
pub start: usize,
pub text: String,
}
pub(crate) fn find_unclaimed_gaps(input: &str, matches: &[MatchSpan]) -> Vec<UnclaimedGap> {
let mut sorted: Vec<(usize, usize)> = matches.iter().map(|m| (m.start, m.end)).collect();
sorted.sort_by_key(|&(s, _)| s);
let filename_start = crate::filename_start(input);
let scan_end = strip_extension_pos(input);
let mut gaps = Vec::new();
let mut cursor = filename_start;
for (start, end) in &sorted {
let start = *start;
let end = (*end).min(scan_end);
if start <= cursor {
cursor = cursor.max(end);
continue;
}
if start > cursor {
let gap_start = cursor;
let gap_text = &input[cursor..start];
let normalized = normalize_gap(gap_text);
if !normalized.is_empty() {
gaps.push(UnclaimedGap {
start: gap_start,
text: normalized,
});
}
}
cursor = cursor.max(end);
}
if cursor < scan_end {
let gap_start = cursor;
let gap_text = &input[cursor..scan_end];
let normalized = normalize_gap(gap_text);
if !normalized.is_empty() {
gaps.push(UnclaimedGap {
start: gap_start,
text: normalized,
});
}
}
gaps
}
pub(crate) fn find_invariant_text(all_gaps: &[&[UnclaimedGap]]) -> Option<(usize, String)> {
if all_gaps.len() < 2 {
return None;
}
let target_gaps = &all_gaps[0];
let sibling_gaps = &all_gaps[1..];
let mut best: Option<(usize, String)> = None;
for (gap_idx, target_gap) in target_gaps.iter().enumerate() {
let mut prefix = target_gap.text.clone();
for sibling_file_gaps in sibling_gaps {
let sib_text = if gap_idx < sibling_file_gaps.len() {
&sibling_file_gaps[gap_idx].text
} else {
let best_match = sibling_file_gaps
.iter()
.max_by_key(|g| common_prefix_len(&prefix, &g.text));
match best_match {
Some(g) => &g.text,
None => {
prefix.clear();
break;
}
}
};
prefix = common_prefix_chars(&prefix, sib_text);
if prefix.chars().count() < 2 {
prefix.clear();
break;
}
}
let trimmed = trim_title_suffix(&prefix);
if trimmed.chars().count() < 2 {
continue;
}
let dominated = best.as_ref().is_some_and(|(best_start, best_text)| {
target_gap.start > *best_start
|| (target_gap.start == *best_start && trimmed.len() <= best_text.len())
});
if !dominated {
best = Some((target_gap.start, trimmed));
}
}
best
}
fn common_prefix_chars(a: &str, b: &str) -> String {
a.chars()
.zip(b.chars())
.take_while(|(ca, cb)| ca == cb)
.map(|(c, _)| c)
.collect()
}
fn common_prefix_len(a: &str, b: &str) -> usize {
a.chars()
.zip(b.chars())
.take_while(|(ca, cb)| ca == cb)
.count()
}
const CJK_ORDINAL_CHARS: &[char] = &[
'第', '巻', '集', '話', '回', '編', '章', '期', '部', ];
fn trim_title_suffix(text: &str) -> String {
let mut s = text.trim_end_matches(SEPS).trim();
loop {
let trimmed = s.trim_end_matches(CJK_ORDINAL_CHARS);
let trimmed = trimmed.trim_end_matches(|c: char| c.is_ascii_digit());
let trimmed = trimmed.trim_end_matches(SEPS).trim();
if trimmed.len() == s.len() {
break;
}
s = trimmed;
}
s.to_string()
}
fn normalize_gap(text: &str) -> String {
let stripped = strip_bracket_regions(text);
let normalized: String = stripped
.chars()
.map(|c| if SEPS.contains(&c) { ' ' } else { c })
.collect();
let trimmed = normalized.trim_matches(TRIM_CHARS);
let trimmed = trim_orphaned_brackets(trimmed);
trimmed.to_string()
}
fn strip_bracket_regions(text: &str) -> String {
let mut result = String::with_capacity(text.len());
let mut depth: [u32; 3] = [0; 3]; for c in text.chars() {
match c {
'(' => depth[0] += 1,
'[' => depth[1] += 1,
'{' => depth[2] += 1,
')' if depth[0] > 0 => {
depth[0] -= 1;
continue;
}
']' if depth[1] > 0 => {
depth[1] -= 1;
continue;
}
'}' if depth[2] > 0 => {
depth[2] -= 1;
continue;
}
_ => {}
}
if depth.iter().all(|&d| d == 0) {
result.push(c);
}
}
result
}
fn trim_orphaned_brackets(text: &str) -> &str {
const CLOSE_BRACKETS: &[char] = &[')', ']', '}'];
let mut s = text;
for close in CLOSE_BRACKETS {
if let Some(pos) = s.find(*close) {
let before = &s[..pos];
let opener = match close {
')' => '(',
']' => '[',
'}' => '{',
_ => unreachable!(),
};
if !before.contains(opener) {
s = s[pos + close.len_utf8()..].trim_start_matches(TRIM_CHARS);
}
}
}
s
}
pub(crate) fn strip_extension_pos(input: &str) -> usize {
if let Some(dot_pos) = input.rfind('.') {
let ext = &input[dot_pos + 1..];
if ext.len() <= 10 && ext.chars().all(|c| c.is_ascii_alphanumeric()) {
return dot_pos;
}
}
input.len()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::matcher::span::Property;
fn make_match(start: usize, end: usize) -> MatchSpan {
MatchSpan::new(start, end, Property::VideoCodec, "test")
}
#[test]
fn unclaimed_gaps_basic() {
let input = "Hello.World.x264.mkv";
let matches = vec![make_match(12, 16)]; let gaps = find_unclaimed_gaps(input, &matches);
assert_eq!(gaps.len(), 1);
assert_eq!(gaps[0].text, "Hello World");
}
#[test]
fn unclaimed_gaps_no_matches() {
let input = "Just.A.Title.mkv";
let gaps = find_unclaimed_gaps(input, &[]);
assert_eq!(gaps.len(), 1);
assert_eq!(gaps[0].text, "Just A Title");
}
#[test]
fn invariant_text_basic() {
let gaps1 = vec![
UnclaimedGap {
start: 0,
text: "Breaking Bad".to_string(),
},
UnclaimedGap {
start: 20,
text: "S05E16".to_string(),
},
];
let gaps2 = vec![
UnclaimedGap {
start: 0,
text: "Breaking Bad".to_string(),
},
UnclaimedGap {
start: 20,
text: "S05E14".to_string(),
},
];
let result = find_invariant_text(&[&gaps1, &gaps2]);
assert_eq!(result, Some((0, "Breaking Bad".to_string())));
}
#[test]
fn invariant_text_cjk() {
let gaps1 = vec![
UnclaimedGap {
start: 4,
text: "十二国記".to_string(),
},
UnclaimedGap {
start: 20,
text: "第13話".to_string(),
},
];
let gaps2 = vec![
UnclaimedGap {
start: 4,
text: "十二国記".to_string(),
},
UnclaimedGap {
start: 20,
text: "第01話".to_string(),
},
];
let result = find_invariant_text(&[&gaps1, &gaps2]);
assert_eq!(result, Some((4, "十二国記".to_string())));
}
#[test]
fn invariant_text_single_set_returns_none() {
let gaps = vec![UnclaimedGap {
start: 0,
text: "Title".to_string(),
}];
assert_eq!(find_invariant_text(&[&gaps]), None);
}
#[test]
fn invariant_text_no_common_returns_none() {
let gaps1 = vec![UnclaimedGap {
start: 0,
text: "Alpha".to_string(),
}];
let gaps2 = vec![UnclaimedGap {
start: 0,
text: "Bravo".to_string(),
}];
assert_eq!(find_invariant_text(&[&gaps1, &gaps2]), None);
}
#[test]
fn invariant_prefers_earliest_gap() {
let gaps1 = vec![
UnclaimedGap {
start: 0,
text: "Show".to_string(),
},
UnclaimedGap {
start: 30,
text: "GROUP".to_string(),
},
];
let gaps2 = vec![
UnclaimedGap {
start: 0,
text: "Show".to_string(),
},
UnclaimedGap {
start: 30,
text: "GROUP".to_string(),
},
];
let result = find_invariant_text(&[&gaps1, &gaps2]);
assert_eq!(result, Some((0, "Show".to_string())));
}
}