hunch 2.0.2

A media filename parser for movies, TV, and anime — built in Rust, inspired by guessit
Documentation
//! Regex pattern definitions for season/episode detection.

use crate::matcher::regex_utils::BoundedRegex;
use std::sync::LazyLock;

pub(super) type Regex = BoundedRegex;

// ── SxxExx patterns ──

/// S01E02, S01E02E03, S01E02-E05, S01E02-05, S01E02+E03, S01.E02.E03.
pub(super) static SXXEXX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z0-9])S(?P<season>\d{1,3})[. ]?E(?:P)?(?P<ep_start>\d{1,4})(?P<ep_rest>(?:(?:[-+]E?|[. ]E|E)\d{1,4})+)?(?![a-z0-9])",
    )
});

/// S03-E01 (dash between S and E).
pub(super) static SXX_DASH_EXX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<season>\d{1,3})[-. ]+E(?P<episode>\d{1,4})(?![a-z0-9])")
});

/// S01E01-S01E21 full range.
pub(super) static SXXEXX_TO_SXXEXX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z0-9])S(?P<s1>\d{1,3})E(?P<e1>\d{1,4})[-]S(?P<s2>\d{1,3})E(?P<e2>\d{1,4})(?![a-z0-9])",
    )
});

/// S06xE01 (x separator).
pub(super) static SXX_X_EXX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<season>\d{1,3})[xX]E(?P<episode>\d{1,4})(?![a-z0-9])")
});

/// S03-X01 for bonus/extras.
pub(super) static SXX_DASH_XXX: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<season>\d{1,3})[-. ]+[xX](?P<episode>\d{1,4})(?![a-z0-9])")
});

// ── NxN patterns ──

/// NxN format: 1x03, 5x9, 5x44x45, 4x05-06.
pub(super) static NXN: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z0-9])(?P<season>\d{1,2})[xX](?P<ep_start>\d{1,4})(?:[-xX](?P<ep2>\d{1,4}))*(?![a-z0-9])",
    )
});

// ── Standalone episode patterns ──

/// E01, Ep01, E02-03, E02-E03, E01 02 03 (space-separated zero-padded), etc.
pub(super) static EP_ONLY: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z0-9])(?:E|Ep\.?)\s*(?P<ep_start>\d{1,4})(?P<ep_rest>(?:(?:[-+]E?|[. ]E|E)\d{1,4})+)?(?![a-z0-9])",
    )
});

/// Episode 1, Episode.01.
pub(super) static EPISODE_WORD: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])Episodes?\s*\.?\s*(?P<episode>\d{1,4})(?:\s*[-~]\s*(?P<ep_end>\d{1,4}))?(?![a-z0-9])",
    )
});

/// Versioned episode: `07v4`, `312v1`.
pub(super) static VERSIONED_EPISODE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(?<![a-z0-9])(?P<episode>\d{1,4})v\d{1,2}(?![a-z0-9])"));

/// Leading episode number: `01 - Ep Name`, `003. Show Name`.
pub(super) static LEADING_EPISODE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"^(?P<episode>0\d{1,3}|\d{1,3})(?:\s*[-.]\s+[A-Za-z])"));

/// Anime episode: `- 01`, `- 001`.
pub(super) static ANIME_EPISODE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(?<![a-z0-9])[-]\s+(?P<episode>\d{1,4})(?:\s|[.]|$)"));

/// Bare episode after dots: `Show.05.Title`.
pub(super) static BARE_EPISODE: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"\.(?P<episode>0\d|\d{2})\.(?![0-9])"));

// ── Season patterns ──

pub(super) static SEASON_ONLY: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione|Temp?\.?)\s*\.?\s*(?P<season>\d{1,2})(?![a-z0-9])",
    )
});

pub(super) static SEASON_ROMAN: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<season>(?:X{0,3})(?:IX|IV|V?I{0,3}))(?![a-z])",
    )
});

pub(super) static SEASON_DIR: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<season>\d{1,2})(?:[/\\])")
});

/// S01-only without episode.
pub(super) static S_ONLY: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(?i)(?<![a-z0-9])S(?P<season>\d{1,3})"));

pub(super) static S_RANGE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<s1>\d{1,3})[-]S(?P<s2>\d{1,3})(?![a-z0-9])")
});

pub(super) static SEASON_MULTI: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<seasons>\d{1,2}(?:\s*[-&.,]\s*\d{1,2})+)(?![a-z0-9])",
    )
});

pub(super) static SEASON_MULTI_RANGE: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<prefix>\d{1,2}(?:[. ]\d{1,2})*)\s*[. ]?\s*(?:~|to)\s*\.?\s*(?P<end>\d{1,2})(?![a-z0-9])",
    )
});

pub(super) static SEASON_RANGE_WORD: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<s1>\d{1,2})\s*\.?\s*(?:to|~|a|\.\.)\s*\.?\s*(?P<s2>\d{1,2})(?![a-z0-9])",
    )
});

pub(super) static S_CONCAT: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<first>\d{1,3})(?:S(?P<rest>\d{1,3}))+(?![a-z0-9])")
});

pub(super) static S_MULTI_NUM: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<seasons>\d{2,3}(?:[-. ]\d{2,3})+)(?![a-z0-9])")
});

pub(super) static S_TO_S: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(r"(?i)(?<![a-z0-9])S(?P<s1>\d{1,3})\.?(?:to|\.to\.)\.?S(?P<s2>\d{1,3})(?![a-z0-9])")
});

pub(super) static SEASON_LIST_AND: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])(?:Season|Saison|Temporada|Stagione)\s*\.?\s*(?P<nums>\d{1,2}(?:[. ]\d{1,2})*)[. ](?:and|&)\s*(?P<last>\d{1,2})(?![a-z0-9])",
    )
});

// ── Spanish Cap patterns ──

pub(super) static CAP_PATTERN: LazyLock<Regex> = LazyLock::new(|| {
    Regex::new(
        r"(?i)(?<![a-z])Cap\.?\s*(?P<num1>\d{3,4})(?:[_](?P<num2>\d{3,4}))?(?:\.[A-Za-z]|[\]\[]|$)",
    )
});

// ── Digit decomposition ──

pub(super) static THREE_DIGIT: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"[.\-_ ](?P<num>\d{3,4})").expect("episode pattern regex is valid")
});

// ── CJK episode markers ──

/// CJK ordinal episode markers: 第N話, 第N集, 第N话, 第N回.
/// Matches both ASCII and full-width digits.
/// Examples: `第13話`, `第1集`, `第03話`.
pub(super) static CJK_EPISODE_MARKER: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"第(?P<episode>[0-90-9]+)[話集话回]")
        .expect("CJK_EPISODE_MARKER regex is valid")
});

// ── CJK fansub bracket episode ──

/// CJK fansub bracket episode: `][01][` or `][13][` or `][13(SP)][`
/// — bare 1-3 digit number inside brackets sandwiched between other bracket groups,
/// optionally followed by parenthetical markers like `(SP)` or `(NC.Ver)`.
pub(super) static CJK_BRACKET_EPISODE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"\]\[(?P<episode>\d{1,3})(?:\([^)]*\))?\](?:\[|$|\.)")
        .expect("CJK_BRACKET_EPISODE regex is valid")
});

// ── CJK fansub Latin-ordinal season+episode ──

/// CJK fansub Latin-ordinal season+episode: `[4th - 01]`, `[2nd - 12]`,
/// `[4th - 01v2]`. Common in Chinese fansub releases that bracket the
/// English ordinal season label alongside the episode number.
///
/// Examples:
/// - `[4th - 01]` → season=4, episode=1
/// - `[2nd - 12v2]` → season=2, episode=12 (the `v2` revision suffix is
///   absorbed by the regex but ignored — the existing `VERSIONED_EPISODE`
///   pattern handles the version separately).
///
/// We only accept ordinals 1st–10th (single digit) to avoid false
/// positives on group names or scene tags that happen to end in those
/// suffixes.
pub(super) static NTH_DASH_EPISODE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(
        r"\[\s*(?P<season>\d)(?:st|nd|rd|th)\s*[-\u2013\u2014]\s*(?P<episode>\d{1,4})(?:[vV]\d+)?\s*\]",
    )
    .expect("NTH_DASH_EPISODE regex is valid")
});

// ── CJK cumulative episode ──

/// CJK cumulative-episode marker: `[总第NN]` (Chinese: "cumulative
/// episode N"). A common Chinese fansub convention for tagging the
/// absolute episode number of a multi-season series alongside the
/// per-season episode number.
///
/// Examples:
/// - `[总第67]` → absolute_episode=67
/// - `[总第 100]` → absolute_episode=100
///
/// Maps to [`Property::AbsoluteEpisode`] (existing property), not a new
/// variant — the semantics match the existing absolute-episode concept
/// (e.g. anime that runs cumulatively across seasons).
pub(super) static CJK_CUMULATIVE_EPISODE: LazyLock<regex::Regex> = LazyLock::new(|| {
    regex::Regex::new(r"\[\s*总第\s*(?P<absolute_episode>\d{1,4})\s*\]")
        .expect("CJK_CUMULATIVE_EPISODE regex is valid")
});

// ── Week pattern ──

/// Week 45, Week.12, etc.
pub(super) static WEEK: LazyLock<Regex> =
    LazyLock::new(|| Regex::new(r"(?i)(?<![a-z])Week[. ]?(?P<week>\d{1,2})(?![a-z0-9])"));