use super::clean::{clean_episode_title, clean_title};
use super::find_first_structural_separator;
use crate::matcher::span::{MatchSpan, Property};
use crate::tokenizer::TokenStream;
use std::sync::LazyLock;
static RE_TRAILING_PART: LazyLock<regex::Regex> = LazyLock::new(|| {
regex::Regex::new(r"(?i)\s+Part\s*(?:I{1,4}|IV|VI{0,3}|IX|X{0,3}|[0-9]+)\s*$")
.expect("RE_TRAILING_PART regex is valid")
});
pub fn extract_episode_title(
input: &str,
matches: &[MatchSpan],
token_stream: &TokenStream,
) -> Option<MatchSpan> {
let segments: Vec<(usize, usize)> = token_stream
.segments
.iter()
.map(|s| (s.start, s.end))
.collect();
let mut anchor_segments: Vec<(usize, usize)> = segments
.iter()
.filter(|(seg_start, seg_end)| {
matches.iter().any(|m| {
m.start >= *seg_start
&& m.end <= *seg_end
&& matches!(
m.property,
Property::Episode | Property::Season | Property::Date
)
})
})
.copied()
.collect();
anchor_segments.sort_by_key(|s| std::cmp::Reverse(s.0));
for (seg_start, seg_end) in &anchor_segments {
if let Some(result) = extract_episode_title_in_segment(input, matches, *seg_start, *seg_end)
{
return Some(result);
}
}
None
}
fn extract_episode_title_in_segment(
input: &str,
matches: &[MatchSpan],
seg_start: usize,
seg_end: usize,
) -> Option<MatchSpan> {
let last_ep_season = matches
.iter()
.filter(|m| {
m.start >= seg_start
&& m.end <= seg_end
&& matches!(
m.property,
Property::Episode
| Property::Season
| Property::EpisodeCount
| Property::SeasonCount
)
})
.max_by_key(|m| m.end);
let last_date = matches
.iter()
.filter(|m| m.start >= seg_start && m.end <= seg_end && m.property == Property::Date)
.max_by_key(|m| m.end);
let last_ep_match = last_ep_season.or(last_date)?;
let ep_title_start = last_ep_match.end;
let technical_props = [
Property::VideoCodec,
Property::AudioCodec,
Property::Source,
Property::ScreenSize,
Property::Edition,
Property::Language,
Property::SubtitleLanguage,
Property::AudioChannels,
Property::Container,
Property::StreamingService,
Property::Year,
Property::Date,
Property::FrameRate,
Property::ColorDepth,
Property::VideoProfile,
];
let next_tech = matches
.iter()
.filter(|m| {
m.start >= ep_title_start
&& m.start < seg_end
&& (technical_props.contains(&m.property)
|| (m.property == Property::Other && !is_suspicious_other(m, input, matches)))
})
.min_by_key(|m| m.start);
let segment_text = &input[seg_start..seg_end];
let ep_title_end = match next_tech {
Some(m) => m.start,
None => {
let has_container = matches
.iter()
.any(|m| m.property == Property::Container && m.start >= seg_start);
if has_container {
segment_text
.rfind('.')
.map(|pos| seg_start + pos)
.unwrap_or(seg_end)
} else {
seg_end
}
}
};
let ep_title_end = {
if ep_title_end <= ep_title_start {
return None;
}
let region = &input[ep_title_start..ep_title_end];
let bracket_pos = region.find('[').or_else(|| {
region.find('(').filter(|&pos| {
let after = ®ion[pos + 1..];
!after.starts_with(|c: char| c.is_ascii_digit())
})
});
match bracket_pos {
Some(0) => return None, Some(pos) => ep_title_start + pos,
None => ep_title_end,
}
};
if ep_title_end <= ep_title_start {
return None;
}
let raw = &input[ep_title_start..ep_title_end];
let raw = split_ep_title_at_show_repeat(raw, matches);
let cleaned = clean_episode_title(raw);
if cleaned.is_empty() {
return None;
}
let cleaned = RE_TRAILING_PART.replace(&cleaned, "").trim().to_string();
if cleaned.is_empty() {
return None;
}
let trimmed = cleaned.trim();
if trimmed.len() <= 1 {
return None;
}
let lower = trimmed.to_lowercase();
if lower.starts_with("season")
|| lower.starts_with("saison")
|| lower.starts_with("tem")
|| lower.starts_with("stagione")
{
return None;
}
let has_ep_in_gap = matches.iter().any(|m| {
m.start >= ep_title_start
&& m.end <= ep_title_end
&& (m.property == Property::Episode || m.property == Property::Season)
});
if has_ep_in_gap {
return None;
}
Some(MatchSpan::new(
ep_title_start,
ep_title_end,
Property::EpisodeTitle,
cleaned,
))
}
pub fn extract_film_title(
input: &str,
matches: &[MatchSpan],
_token_stream: &TokenStream,
) -> Option<(MatchSpan, MatchSpan)> {
let film_match = matches.iter().find(|m| m.property == Property::Film)?;
let _title_match = matches.iter().find(|m| m.property == Property::Title)?;
let fn_start = crate::filename_start(input);
if film_match.start <= fn_start {
return None;
}
let film_title_raw = &input[fn_start..film_match.start];
let film_title = clean_title(film_title_raw);
if film_title.is_empty() {
return None;
}
let after_film = film_match.end;
let next_match_after_film = matches
.iter()
.filter(|m| {
m.start > after_film
&& m.start >= fn_start
&& !m.is_extension
&& !matches!(
m.property,
Property::Title | Property::ReleaseGroup | Property::Bonus
)
})
.min_by_key(|m| m.start);
let title_end = next_match_after_film.map(|m| m.start).unwrap_or_else(|| {
input[fn_start..]
.rfind('.')
.map(|p| fn_start + p)
.unwrap_or(input.len())
});
if title_end <= after_film {
return None;
}
let title_raw = &input[after_film..title_end];
let title_cleaned = clean_title(title_raw);
if title_cleaned.is_empty() {
return None;
}
let title_end = find_first_structural_separator(&title_cleaned)
.map(|offset| title_cleaned[..offset].trim().to_string())
.unwrap_or(title_cleaned);
Some((
MatchSpan::new(fn_start, film_match.start, Property::FilmTitle, film_title),
MatchSpan::new(
after_film,
title_end.len() + after_film,
Property::Title,
title_end,
),
))
}
pub fn extract_alternative_titles(
input: &str,
matches: &[MatchSpan],
_token_stream: &TokenStream,
) -> Vec<MatchSpan> {
let filename_start = crate::filename_start(input);
let first_match = matches
.iter()
.filter(|m| {
m.start >= filename_start
&& !m.is_extension
&& !matches!(
m.property,
Property::Title
| Property::FilmTitle
| Property::AlternativeTitle
| Property::EpisodeTitle
)
})
.min_by_key(|m| m.start);
let filename = &input[filename_start..];
let title_end_abs = match first_match {
Some(m) => m.start,
None => filename
.rfind('.')
.map(|p| filename_start + p)
.unwrap_or(input.len()),
};
if title_end_abs <= filename_start {
return Vec::new();
}
let raw_title = &input[filename_start..title_end_abs];
let boundary_offset = match find_first_structural_separator(raw_title) {
Some(offset) => offset,
None => return Vec::new(),
};
let after = &raw_title[boundary_offset..];
let sep_len =
if after.starts_with(" - ") || after.starts_with("_-_") || after.starts_with(".-.") {
3
} else if after.starts_with("--")
|| after.starts_with(" (")
|| after.starts_with("_(")
|| after.starts_with(".(")
{
2
} else {
1
};
let sep_end = boundary_offset + sep_len;
if sep_end >= raw_title.len() {
return Vec::new();
}
let alt_raw = &raw_title[sep_end..];
let separators = [" - ", "_-_", ".-."];
let segments = split_on_separators(alt_raw, &separators);
let mut results = Vec::new();
let mut offset = sep_end;
for segment in &segments {
let cleaned = clean_title(segment);
if !cleaned.is_empty() {
results.push(MatchSpan::new(
filename_start + offset,
filename_start + offset + segment.len(),
Property::AlternativeTitle,
cleaned,
));
}
offset += segment.len();
let remaining = &raw_title[offset..];
for sep in &separators {
if remaining.starts_with(sep) {
offset += sep.len();
break;
}
}
}
results
}
fn split_on_separators<'a>(s: &'a str, separators: &[&str]) -> Vec<&'a str> {
let mut result = Vec::new();
let mut remaining = s;
loop {
let earliest = separators
.iter()
.filter_map(|sep| remaining.find(sep).map(|pos| (pos, *sep)))
.min_by_key(|(pos, _)| *pos);
match earliest {
Some((pos, sep)) => {
if pos > 0 {
result.push(&remaining[..pos]);
}
remaining = &remaining[pos + sep.len()..];
}
None => {
if !remaining.is_empty() {
result.push(remaining);
}
break;
}
}
}
result
}
pub fn infer_media_type(input: &str, matches: &[MatchSpan]) -> &'static str {
let has_season = matches.iter().any(|m| m.property == Property::Season);
let has_date = matches.iter().any(|m| m.property == Property::Date);
let has_episode_details = matches
.iter()
.any(|m| m.property == Property::EpisodeDetails);
let has_edition = matches.iter().any(|m| m.property == Property::Edition);
let has_strong_movie_signal = path_hints_movie(input) || has_movie_signal(input);
let has_bonus_no_film = matches.iter().any(|m| m.property == Property::Bonus)
&& !matches.iter().any(|m| m.property == Property::Film)
&& !matches.iter().any(|m| m.property == Property::Year);
let strong_episode = matches
.iter()
.any(|m| m.property == Property::Episode && m.priority > crate::priority::HEURISTIC);
let weak_episode = !strong_episode && matches.iter().any(|m| m.property == Property::Episode);
let episode_details_signal = has_episode_details && !has_edition;
if episode_details_signal && !strong_episode && !weak_episode && !has_season && !has_date {
return "extra";
}
if strong_episode || has_season || has_date || episode_details_signal || has_bonus_no_film {
return "episode";
}
if has_strong_movie_signal {
return "movie";
}
if path_hints_episode(input) {
return "episode";
}
if weak_episode {
return "episode";
}
"movie"
}
fn path_hints_movie(input: &str) -> bool {
let dir_part = match input.rfind(['/', '\\']) {
Some(i) => &input[..i],
None => return false,
};
let lower = dir_part.to_lowercase();
lower
.split(['/', '\\'])
.any(|c| matches!(c, "movie" | "movies" | "film" | "films"))
}
fn has_movie_signal(input: &str) -> bool {
let mut parts = input.rsplitn(2, ['/', '\\']);
let filename = parts.next().unwrap_or(input);
let dir_part = parts.next().unwrap_or("");
let parent = dir_part.rsplit(['/', '\\']).next().unwrap_or("");
is_movie_signal_component(filename) || is_movie_signal_component(parent)
}
fn is_movie_signal_component(component: &str) -> bool {
let normalized = component
.replace(['.', '_', '-'], " ")
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.to_lowercase();
if normalized.is_empty() {
return false;
}
normalized.contains("劇場版")
|| normalized.contains("剧场版")
|| normalized.contains("劇場版")
|| normalized.contains("映画")
|| normalized.contains(" movie")
|| normalized.starts_with("movie ")
|| normalized.ends_with(" movie")
|| normalized.contains(" the movie")
|| normalized.contains(" a power rangers movie")
|| normalized.contains(" film")
}
fn path_hints_episode(input: &str) -> bool {
let dir_part = match input.rfind(['/', '\\']) {
Some(i) => &input[..i],
None => return false, };
let lower = dir_part.to_lowercase();
lower.split(['/', '\\']).any(is_episode_directory)
}
fn is_episode_directory(component: &str) -> bool {
matches!(
component,
"tv" | "tv shows" | "television" | "series" | "anime" | "donghua"
) || component.starts_with("season ")
|| component.starts_with("saison ")
|| component.starts_with("temporada ")
|| component.starts_with("stagione ")
|| component
.strip_prefix('s')
.is_some_and(|rest| !rest.is_empty() && rest.len() <= 3 && rest.chars().all(|c| c.is_ascii_digit()))
}
fn is_suspicious_other(other_match: &MatchSpan, input: &str, _matches: &[MatchSpan]) -> bool {
const TITLE_AMBIGUOUS_OTHER: &[&str] = &[
"Proper", "Fix", "3D", "HD", ];
if !TITLE_AMBIGUOUS_OTHER
.iter()
.any(|v| v.eq_ignore_ascii_case(&other_match.value))
{
return false;
}
if other_match.end > other_match.start && other_match.end <= input.len() {
let original_text = input[other_match.start..other_match.end].to_lowercase();
if matches!(
original_text.as_str(),
"repack" | "readnfo" | "real" | "rerip" | "internal"
) {
return false;
}
}
let after_pos = other_match.end;
if after_pos >= input.len() {
return false;
}
let rest = &input[after_pos..];
let next_word: String = rest
.trim_start_matches(['.', '-', '_', ' '])
.chars()
.take_while(|c| c.is_alphanumeric())
.collect();
if next_word.is_empty() {
return false;
}
!crate::zone_map::is_tier2_token(&next_word) && !is_tech_word(&next_word)
}
fn is_tech_word(word: &str) -> bool {
let lower = word.to_lowercase();
matches!(
lower.as_str(),
"720p" | "1080p" | "2160p" | "480p" | "hdr" | "hdr10" | "sdr"
)
}
fn split_ep_title_at_show_repeat<'a>(raw: &'a str, matches: &[MatchSpan]) -> &'a str {
let show_title = matches
.iter()
.find(|m| m.property == Property::Title)
.map(|m| m.value.to_lowercase());
let show_title = match show_title {
Some(t) => t,
None => return raw,
};
let separators = [" - ", "_-_", ".-."];
for sep in &separators {
let mut search_start = 0;
while let Some(pos) = raw[search_start..].find(sep) {
let abs_pos = search_start + pos;
let before = raw[..abs_pos].trim();
let before_clean = before
.replace(['.', '_'], " ")
.trim()
.trim_start_matches(['-', ' '])
.trim()
.to_lowercase();
if !before_clean.is_empty()
&& (before_clean == show_title || show_title.contains(&before_clean))
{
let after = &raw[abs_pos + sep.len()..];
if let Some(next_pos) = after.find(sep) {
return &raw[abs_pos + sep.len() + next_pos + sep.len()..];
}
return &raw[abs_pos + sep.len()..];
}
search_start = abs_pos + sep.len();
}
}
raw
}