zantetsu-core 0.2.0

Heuristic anime metadata extraction and normalization engine
Documentation
//! # Unified Parser Interface
//!
//! Provides a unified API for parsing anime filenames using the heuristic engine.

use crate::error::Result;
use crate::parser::heuristic::HeuristicParser;
use crate::types::{ParseMode, ParseResult};

/// Configuration for the parser.
#[derive(Debug, Clone)]
pub struct ParserConfig {
    /// Which parsing mode to use (Full and Auto both delegate to heuristic).
    pub mode: ParseMode,
}

impl Default for ParserConfig {
    fn default() -> Self {
        Self {
            mode: ParseMode::Auto,
        }
    }
}

impl ParserConfig {
    /// Create a new parser configuration with default settings.
    pub fn new() -> Self {
        Self::default()
    }

    /// Set the parse mode.
    pub fn with_mode(mut self, mode: ParseMode) -> Self {
        self.mode = mode;
        self
    }
}

/// Unified parser backed by the heuristic regex engine.
pub struct Parser {
    config: ParserConfig,
    heuristic: HeuristicParser,
}

fn normalize_whitespace(value: &str) -> String {
    value.split_whitespace().collect::<Vec<_>>().join(" ")
}

fn normalize_candidate_text(value: &str) -> Option<String> {
    let cleaned = value
        .replace(['.', '_'], " ")
        .replace(['[', ']', '(', ')', '{', '}'], " ")
        .trim_matches(|c: char| {
            matches!(c, '[' | ']' | '(' | ')' | '{' | '}' | ' ' | '.' | '_' | '-')
        })
        .to_string();

    let cleaned = normalize_whitespace(&cleaned);
    (!cleaned.is_empty()).then_some(cleaned)
}

fn normalized_metadata_token(token: &str) -> String {
    token
        .trim_matches(|c: char| !c.is_ascii_alphanumeric())
        .to_ascii_lowercase()
}

fn looks_like_metadata_token(token: &str) -> bool {
    let normalized = normalized_metadata_token(token);
    match normalized.as_str() {
        "480p" | "480i" | "720p" | "720i" | "1080p" | "1080i" | "2160p" | "2160i" | "4k"
        | "bluray" | "bd" | "webdl" | "webrip" | "dvd" | "hdtv" | "remux" | "hevc" | "x264"
        | "x265" | "h264" | "h265" | "av1" | "vp9" | "aac" | "flac" | "opus" | "ac3" | "dts"
        | "mp3" | "mkv" | "mp4" | "avi" | "batch" | "complete" => true,
        _ if normalized.starts_with('v') && normalized[1..].chars().all(|c| c.is_ascii_digit()) => {
            true
        }
        _ if normalized.len() == 8 && normalized.chars().all(|c| c.is_ascii_hexdigit()) => true,
        _ => false,
    }
}

fn looks_like_metadata_noise(value: &str) -> bool {
    let parts: Vec<&str> = value.split_whitespace().collect();
    !parts.is_empty() && parts.iter().all(|part| looks_like_metadata_token(part))
}

fn clean_group_candidate(value: &str) -> Option<String> {
    let cleaned = normalize_candidate_text(value)?;
    if cleaned.len() < 2 || looks_like_metadata_noise(&cleaned) {
        return None;
    }
    Some(cleaned)
}

fn sanitize_result(mut result: ParseResult) -> ParseResult {
    result.group = result.group.as_deref().and_then(clean_group_candidate);
    result
}

impl Parser {
    /// Create a new parser with the given configuration.
    pub fn new(config: ParserConfig) -> Result<Self> {
        let heuristic = HeuristicParser::new()?;
        Ok(Self { config, heuristic })
    }

    /// Create a new parser with default configuration.
    #[allow(clippy::should_implement_trait)]
    pub fn default() -> Result<Self> {
        Self::new(ParserConfig::default())
    }

    /// Parse a filename using the configured mode.
    ///
    /// # Arguments
    /// * `input` - The filename or torrent name to parse
    ///
    /// # Returns
    /// A `ParseResult` containing extracted metadata
    ///
    /// # Examples
    /// ```
    /// use zantetsu_core::parser::Parser;
    ///
    /// let parser = Parser::default().unwrap();
    /// let result = parser.parse("[SubsPlease] Jujutsu Kaisen - 24 (1080p) [A1B2C3D4].mkv").unwrap();
    ///
    /// assert_eq!(result.title.as_deref(), Some("Jujutsu Kaisen"));
    /// assert_eq!(result.group.as_deref(), Some("SubsPlease"));
    /// ```
    pub fn parse(&self, input: &str) -> Result<ParseResult> {
        let result = self.heuristic.parse(input)?;
        Ok(sanitize_result(result))
    }

    /// Get the parser configuration.
    pub fn config(&self) -> &ParserConfig {
        &self.config
    }
}

/// Convenience function to parse a filename with default settings.
pub fn parse(input: &str) -> Result<ParseResult> {
    let parser = Parser::default()?;
    parser.parse(input)
}

/// Parse with a specific mode (mode is accepted for API compatibility but ignored; all modes use heuristic).
pub fn parse_with_mode(input: &str, _mode: ParseMode) -> Result<ParseResult> {
    let parser = Parser::default()?;
    parser.parse(input)
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parser_creation() {
        let parser = Parser::default();
        assert!(parser.is_ok());
    }

    #[test]
    fn test_parser_config() {
        let config = ParserConfig::new().with_mode(ParseMode::Light);
        assert_eq!(config.mode, ParseMode::Light);
    }

    #[test]
    fn test_parse_light_mode() {
        let config = ParserConfig::new().with_mode(ParseMode::Light);
        let parser = Parser::new(config).unwrap();

        let result = parser
            .parse("[SubsPlease] Jujutsu Kaisen - 24 (1080p) [A1B2C3D4].mkv")
            .unwrap();

        assert_eq!(result.group.as_deref(), Some("SubsPlease"));
        assert_eq!(result.title.as_deref(), Some("Jujutsu Kaisen"));
        assert_eq!(result.parse_mode, ParseMode::Light);
    }

    #[test]
    fn test_parse_empty() {
        let parser = Parser::default().unwrap();
        let result = parser.parse("");
        assert!(result.is_err());
    }

    #[test]
    fn test_parse_convenience_function() {
        let result = parse("[Erai-raws] Test Anime - 01 (720p).mp4");
        assert!(result.is_ok());

        let parsed = result.unwrap();
        assert_eq!(parsed.group.as_deref(), Some("Erai-raws"));
        assert_eq!(parsed.extension.as_deref(), Some("mp4"));
    }

    #[test]
    fn test_parse_with_mode() {
        let result = parse_with_mode("[Test] Anime - 01.mkv", ParseMode::Light);
        assert!(result.is_ok());
    }
}