Skip to main content

zantetsu_core/parser/
unified.rs

1//! # Unified Parser Interface
2//!
3//! Provides a unified API for parsing anime filenames using the heuristic engine.
4
5use crate::error::Result;
6use crate::parser::heuristic::HeuristicParser;
7use crate::types::{ParseMode, ParseResult};
8
9/// Configuration for the parser.
10#[derive(Debug, Clone)]
11pub struct ParserConfig {
12    /// Which parsing mode to use (Full and Auto both delegate to heuristic).
13    pub mode: ParseMode,
14}
15
16impl Default for ParserConfig {
17    fn default() -> Self {
18        Self {
19            mode: ParseMode::Auto,
20        }
21    }
22}
23
24impl ParserConfig {
25    /// Create a new parser configuration with default settings.
26    pub fn new() -> Self {
27        Self::default()
28    }
29
30    /// Set the parse mode.
31    pub fn with_mode(mut self, mode: ParseMode) -> Self {
32        self.mode = mode;
33        self
34    }
35}
36
37/// Unified parser backed by the heuristic regex engine.
38pub struct Parser {
39    config: ParserConfig,
40    heuristic: HeuristicParser,
41}
42
43fn normalize_whitespace(value: &str) -> String {
44    value.split_whitespace().collect::<Vec<_>>().join(" ")
45}
46
47fn normalize_candidate_text(value: &str) -> Option<String> {
48    let cleaned = value
49        .replace(['.', '_'], " ")
50        .replace(['[', ']', '(', ')', '{', '}'], " ")
51        .trim_matches(|c: char| {
52            matches!(c, '[' | ']' | '(' | ')' | '{' | '}' | ' ' | '.' | '_' | '-')
53        })
54        .to_string();
55
56    let cleaned = normalize_whitespace(&cleaned);
57    (!cleaned.is_empty()).then_some(cleaned)
58}
59
60fn normalized_metadata_token(token: &str) -> String {
61    token
62        .trim_matches(|c: char| !c.is_ascii_alphanumeric())
63        .to_ascii_lowercase()
64}
65
66fn looks_like_metadata_token(token: &str) -> bool {
67    let normalized = normalized_metadata_token(token);
68    match normalized.as_str() {
69        "480p" | "480i" | "720p" | "720i" | "1080p" | "1080i" | "2160p" | "2160i" | "4k"
70        | "bluray" | "bd" | "webdl" | "webrip" | "dvd" | "hdtv" | "remux" | "hevc" | "x264"
71        | "x265" | "h264" | "h265" | "av1" | "vp9" | "aac" | "flac" | "opus" | "ac3" | "dts"
72        | "mp3" | "mkv" | "mp4" | "avi" | "batch" | "complete" => true,
73        _ if normalized.starts_with('v') && normalized[1..].chars().all(|c| c.is_ascii_digit()) => {
74            true
75        }
76        _ if normalized.len() == 8 && normalized.chars().all(|c| c.is_ascii_hexdigit()) => true,
77        _ => false,
78    }
79}
80
81fn looks_like_metadata_noise(value: &str) -> bool {
82    let parts: Vec<&str> = value.split_whitespace().collect();
83    !parts.is_empty() && parts.iter().all(|part| looks_like_metadata_token(part))
84}
85
86fn clean_group_candidate(value: &str) -> Option<String> {
87    let cleaned = normalize_candidate_text(value)?;
88    if cleaned.len() < 2 || looks_like_metadata_noise(&cleaned) {
89        return None;
90    }
91    Some(cleaned)
92}
93
94fn sanitize_result(mut result: ParseResult) -> ParseResult {
95    result.group = result.group.as_deref().and_then(clean_group_candidate);
96    result
97}
98
99impl Parser {
100    /// Create a new parser with the given configuration.
101    pub fn new(config: ParserConfig) -> Result<Self> {
102        let heuristic = HeuristicParser::new()?;
103        Ok(Self { config, heuristic })
104    }
105
106    /// Create a new parser with default configuration.
107    #[allow(clippy::should_implement_trait)]
108    pub fn default() -> Result<Self> {
109        Self::new(ParserConfig::default())
110    }
111
112    /// Parse a filename using the configured mode.
113    ///
114    /// # Arguments
115    /// * `input` - The filename or torrent name to parse
116    ///
117    /// # Returns
118    /// A `ParseResult` containing extracted metadata
119    ///
120    /// # Examples
121    /// ```
122    /// use zantetsu_core::parser::Parser;
123    ///
124    /// let parser = Parser::default().unwrap();
125    /// let result = parser.parse("[SubsPlease] Jujutsu Kaisen - 24 (1080p) [A1B2C3D4].mkv").unwrap();
126    ///
127    /// assert_eq!(result.title.as_deref(), Some("Jujutsu Kaisen"));
128    /// assert_eq!(result.group.as_deref(), Some("SubsPlease"));
129    /// ```
130    pub fn parse(&self, input: &str) -> Result<ParseResult> {
131        let result = self.heuristic.parse(input)?;
132        Ok(sanitize_result(result))
133    }
134
135    /// Get the parser configuration.
136    pub fn config(&self) -> &ParserConfig {
137        &self.config
138    }
139}
140
141/// Convenience function to parse a filename with default settings.
142pub fn parse(input: &str) -> Result<ParseResult> {
143    let parser = Parser::default()?;
144    parser.parse(input)
145}
146
147/// Parse with a specific mode (mode is accepted for API compatibility but ignored; all modes use heuristic).
148pub fn parse_with_mode(input: &str, _mode: ParseMode) -> Result<ParseResult> {
149    let parser = Parser::default()?;
150    parser.parse(input)
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn test_parser_creation() {
159        let parser = Parser::default();
160        assert!(parser.is_ok());
161    }
162
163    #[test]
164    fn test_parser_config() {
165        let config = ParserConfig::new().with_mode(ParseMode::Light);
166        assert_eq!(config.mode, ParseMode::Light);
167    }
168
169    #[test]
170    fn test_parse_light_mode() {
171        let config = ParserConfig::new().with_mode(ParseMode::Light);
172        let parser = Parser::new(config).unwrap();
173
174        let result = parser
175            .parse("[SubsPlease] Jujutsu Kaisen - 24 (1080p) [A1B2C3D4].mkv")
176            .unwrap();
177
178        assert_eq!(result.group.as_deref(), Some("SubsPlease"));
179        assert_eq!(result.title.as_deref(), Some("Jujutsu Kaisen"));
180        assert_eq!(result.parse_mode, ParseMode::Light);
181    }
182
183    #[test]
184    fn test_parse_empty() {
185        let parser = Parser::default().unwrap();
186        let result = parser.parse("");
187        assert!(result.is_err());
188    }
189
190    #[test]
191    fn test_parse_convenience_function() {
192        let result = parse("[Erai-raws] Test Anime - 01 (720p).mp4");
193        assert!(result.is_ok());
194
195        let parsed = result.unwrap();
196        assert_eq!(parsed.group.as_deref(), Some("Erai-raws"));
197        assert_eq!(parsed.extension.as_deref(), Some("mp4"));
198    }
199
200    #[test]
201    fn test_parse_with_mode() {
202        let result = parse_with_mode("[Test] Anime - 01.mkv", ParseMode::Light);
203        assert!(result.is_ok());
204    }
205}