mod clean;
mod secondary;
mod strategies;
pub use secondary::{
extract_alternative_titles, extract_episode_title, extract_film_title, infer_media_type,
};
pub use strategies::{TitleConfidence, TitleExtraction};
use crate::FILENAME_SEPS as SEPS;
use crate::matcher::span::{MatchSpan, Property};
use crate::tokenizer::TokenStream;
use crate::zone_map::ZoneMap;
use clean::{clean_title, is_abbreviated, is_likely_extension, pick_better_casing};
use strategies::{StrategyContext, TitleStrategy};
const BRACKETS: &[char] = &['(', ')', '[', ']', '{', '}'];
fn is_tech_property(p: Property) -> bool {
matches!(
p,
Property::VideoCodec
| Property::AudioCodec
| Property::Source
| Property::ScreenSize
| Property::AudioChannels
| Property::AudioProfile
| Property::VideoProfile
| Property::FrameRate
| Property::ColorDepth
| Property::StreamingService
| Property::Edition
| Property::Other
)
}
pub fn extract_title(
input: &str,
matches: &[MatchSpan],
zone_map: &ZoneMap,
_token_stream: &TokenStream,
) -> Option<TitleExtraction> {
let filename_start = input.rfind(['/', '\\']).map(|i| i + 1).unwrap_or(0);
let filename = &input[filename_start..];
let first_match_in_filename = matches
.iter()
.filter(|m| {
m.start >= filename_start
&& !m.is_extension
&& (!m.reclaimable || m.start == filename_start)
})
.min_by_key(|m| m.start);
let title_end_abs = match first_match_in_filename {
Some(m) => m.start,
None => {
let ext_start = filename.rfind('.').unwrap_or(filename.len());
if ext_start < filename.len() {
let candidate_ext = &filename[ext_start + 1..];
if is_likely_extension(&candidate_ext.to_lowercase()) {
filename_start + ext_start
} else {
filename_start + filename.len()
}
} else {
filename_start + filename.len()
}
}
};
if title_end_abs <= filename_start {
return handle_empty_title(
input,
filename_start,
filename,
matches,
zone_map,
first_match_in_filename,
);
}
let raw_title = &input[filename_start..title_end_abs];
let structural_sep_offset = find_first_structural_separator(raw_title);
let title_end_abs = structural_sep_offset
.map(|offset| filename_start + offset)
.unwrap_or(title_end_abs);
let raw_title = &input[filename_start..title_end_abs];
let confidence = if structural_sep_offset.is_some() {
TitleConfidence::Strong
} else {
match first_match_in_filename {
Some(m) if !is_tech_property(m.property) => TitleConfidence::Strong,
_ => TitleConfidence::Weak,
}
};
let cleaned = clean_title(raw_title);
if cleaned.is_empty() {
let ctx = StrategyContext {
input,
matches,
filename_start,
};
if let Some(title) = strategies::run_fallback_ladder(&ctx) {
return Some(title);
}
return None;
}
if has_parent_dir(input)
&& let Some(parent_match) = strategies::ParentDir.try_extract(&StrategyContext {
input,
matches,
filename_start,
})
&& parent_match.value.to_lowercase() == cleaned.to_lowercase()
&& parent_match.value != cleaned
{
let best = pick_better_casing(&cleaned, &parent_match.value);
if best != cleaned {
return Some(TitleExtraction::new(
MatchSpan::new(filename_start, title_end_abs, Property::Title, best),
confidence,
));
}
}
if is_abbreviated(&cleaned)
&& has_parent_dir(input)
&& let Some(parent_title) = strategies::ParentDir.try_extract(&StrategyContext {
input,
matches,
filename_start,
})
{
return Some(TitleExtraction::new(
parent_title,
strategies::ParentDir.confidence(),
));
}
Some(TitleExtraction::new(
MatchSpan::new(filename_start, title_end_abs, Property::Title, cleaned),
confidence,
))
}
pub fn absorb_reclaimable(title: &MatchSpan, matches: &mut Vec<MatchSpan>) {
matches.retain(|m| {
if !m.reclaimable {
return true;
}
!(m.start >= title.start && m.end <= title.end)
});
}
fn handle_empty_title(
input: &str,
filename_start: usize,
filename: &str,
matches: &[MatchSpan],
zone_map: &ZoneMap,
first_match_in_filename: Option<&MatchSpan>,
) -> Option<TitleExtraction> {
if let Some(ref yi) = zone_map.year
&& let Some(ty) = yi.title_years.iter().find(|ty| ty.start == filename_start)
&& let Some(title) =
extract_title_after_position(input, ty.end, filename_start, filename, matches)
{
return Some(TitleExtraction::new(title, TitleConfidence::Strong));
}
if let Some(first_m) = first_match_in_filename
&& first_m.property == Property::Year
&& first_m.start == filename_start
&& let Some(title) =
extract_title_after_position(input, first_m.end, filename_start, filename, matches)
{
return Some(TitleExtraction::new(title, TitleConfidence::Strong));
}
if let Some(first_m) = first_match_in_filename
&& first_m.start == filename_start
&& is_tech_property(first_m.property)
{
let mut skip_end = first_m.end;
loop {
let next = matches.iter().find(|m| {
m.start >= skip_end
&& m.start <= skip_end + 3 && m.start < filename_start + filename.len()
&& !m.is_extension
&& is_tech_property(m.property)
});
match next {
Some(m) => skip_end = m.end,
None => break,
}
}
if let Some(title) =
extract_title_after_position(input, skip_end, filename_start, filename, matches)
{
return Some(TitleExtraction::new(title, TitleConfidence::Weak));
}
}
if !input.contains(['/', '\\']) && !input.contains('.') && input.len() <= 10 {
let cleaned = clean_title(input);
if !cleaned.is_empty() {
return Some(TitleExtraction::new(
MatchSpan::new(0, input.len(), Property::Title, cleaned),
TitleConfidence::Weak,
));
}
}
let ctx = StrategyContext {
input,
matches,
filename_start,
};
if let Some(title) = strategies::UnclaimedBracket.try_extract(&ctx) {
return Some(TitleExtraction::new(
title,
strategies::UnclaimedBracket.confidence(),
));
}
strategies::ParentDir
.try_extract(&ctx)
.map(|t| TitleExtraction::new(t, strategies::ParentDir.confidence()))
}
fn extract_title_after_position(
input: &str,
start: usize,
filename_start: usize,
filename: &str,
matches: &[MatchSpan],
) -> Option<MatchSpan> {
let next_match = matches
.iter()
.filter(|m| m.start > start && !m.is_extension)
.min_by_key(|m| m.start);
let title_end = next_match
.map(|m| m.start)
.unwrap_or(filename_start + filename.len());
if title_end > start {
let raw = &input[start..title_end];
let cleaned = clean_title(raw);
if !cleaned.is_empty() {
return Some(MatchSpan::new(start, title_end, Property::Title, cleaned));
}
}
None
}
fn has_parent_dir(input: &str) -> bool {
input.contains('/') || input.contains('\\')
}
pub(super) fn find_first_structural_separator(raw: &str) -> Option<usize> {
const MIN_TITLE_LEN: usize = 3;
const SEPARATORS: &[&str] = &[" (", "_(", ".(", " - ", "_-_", ".-.", "--"];
SEPARATORS
.iter()
.filter_map(|sep| raw.find(sep).filter(|&pos| pos >= MIN_TITLE_LEN))
.min()
}
#[cfg(test)]
mod tests {
use super::*;
use crate::tokenizer;
use crate::zone_map;
fn test_zone_map(input: &str) -> ZoneMap {
let ts = tokenizer::tokenize(input);
zone_map::build_zone_map(input, &ts)
}
fn test_ts(input: &str) -> tokenizer::TokenStream {
tokenizer::tokenize(input)
}
#[test]
fn first_separator_wins_picks_earliest_offset() {
assert_eq!(
find_first_structural_separator("Show - Subtitle (2020)"),
Some(4)
);
}
#[test]
fn first_separator_skips_too_short_prefix() {
assert_eq!(find_first_structural_separator("a - b"), None);
assert_eq!(find_first_structural_separator("abc - d"), Some(3));
}
#[test]
fn first_separator_returns_none_on_separatorless_input() {
assert_eq!(
find_first_structural_separator("PlainTitleNoSeparator"),
None
);
}
#[test]
fn first_separator_caveat_anime_multi_segment() {
let raw = "Enen no Shouboutai - San no Shou Part 2";
assert_eq!(
find_first_structural_separator(raw),
Some(18),
"function returns the first \" - \"; AfterBracketGroup must \
bypass it on the anime-episode branch (#124 / #127)"
);
}
#[test]
fn test_title_before_year() {
let input = "The.Matrix.1999.1080p.mkv";
let matches = vec![MatchSpan::new(11, 15, Property::Year, "1999")];
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &matches, &zm, &ts).unwrap();
assert_eq!(title.span.value, "The Matrix");
}
#[test]
fn test_title_no_matches() {
let input = "JustATitle.mkv";
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &[], &zm, &ts).unwrap();
assert_eq!(title.span.value, "JustATitle");
}
#[test]
fn test_title_with_path() {
let input = "/movies/dir/The.Movie.2020.mkv";
let matches = vec![MatchSpan::new(22, 26, Property::Year, "2020")];
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &matches, &zm, &ts).unwrap();
assert_eq!(title.span.value, "The Movie");
}
#[test]
fn test_abbreviated_fallback() {
let input = "Movies/Alice in Wonderland DVDRip.XviD-DiAMOND/dmd-aw.avi";
let matches = vec![MatchSpan::new(27, 34, Property::Source, "DVD")];
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &matches, &zm, &ts);
assert!(title.is_some());
assert_eq!(title.unwrap().span.value, "Alice in Wonderland");
}
#[test]
fn test_infer_episode() {
let matches = vec![
MatchSpan::new(0, 5, Property::Season, "1"),
MatchSpan::new(5, 10, Property::Episode, "3"),
];
assert_eq!(infer_media_type("Show.S01E03.mkv", &matches), "episode");
}
#[test]
fn test_reclaimable_absorbed_into_title() {
let input = "Harold.And.Kumar.3D.Christmas.mkv";
let reclaimable_3d = MatchSpan::new(17, 19, Property::Other, "3D").with_reclaimable();
let mut matches = vec![reclaimable_3d];
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &matches, &zm, &ts).unwrap();
assert_eq!(title.span.value, "Harold And Kumar 3D Christmas");
absorb_reclaimable(&title.span, &mut matches);
assert!(matches.is_empty(), "reclaimable 3D should be absorbed");
}
#[test]
fn test_confident_3d_stops_title() {
let input = "Pacific.Rim.3D.2013.BluRay.mkv";
let confident_3d = MatchSpan::new(12, 14, Property::Other, "3D");
let year = MatchSpan::new(15, 19, Property::Year, "2013");
let matches = vec![confident_3d, year];
let zm = test_zone_map(input);
let ts = test_ts(input);
let title = extract_title(input, &matches, &zm, &ts).unwrap();
assert_eq!(title.span.value, "Pacific Rim");
}
#[test]
fn test_infer_movie() {
let matches = vec![MatchSpan::new(0, 4, Property::Year, "2024")];
assert_eq!(infer_media_type("Movie.2024.mkv", &matches), "movie");
}
#[test]
fn test_movie_dir_suppresses_heuristic_episode() {
let matches = vec![
MatchSpan::new(52, 56, Property::Episode, "10")
.with_priority(crate::priority::HEURISTIC),
];
assert_eq!(
infer_media_type(
"movie/Japanese/Detective Conan/Detective.Conan.Movie.10.mkv",
&matches
),
"movie"
);
}
#[test]
fn test_movie_dir_keeps_strong_episode() {
let matches = vec![
MatchSpan::new(0, 6, Property::Season, "1"),
MatchSpan::new(0, 6, Property::Episode, "3").with_priority(crate::priority::STRUCTURAL),
];
assert_eq!(
infer_media_type("movie/Show.S01E03.mkv", &matches),
"episode"
);
}
}