zantetsu_core/parser/
unified.rs1use crate::error::Result;
6use crate::parser::heuristic::HeuristicParser;
7use crate::types::{ParseMode, ParseResult};
8
9#[derive(Debug, Clone)]
11pub struct ParserConfig {
12 pub mode: ParseMode,
14}
15
16impl Default for ParserConfig {
17 fn default() -> Self {
18 Self {
19 mode: ParseMode::Auto,
20 }
21 }
22}
23
24impl ParserConfig {
25 pub fn new() -> Self {
27 Self::default()
28 }
29
30 pub fn with_mode(mut self, mode: ParseMode) -> Self {
32 self.mode = mode;
33 self
34 }
35}
36
37pub struct Parser {
39 config: ParserConfig,
40 heuristic: HeuristicParser,
41}
42
43fn normalize_whitespace(value: &str) -> String {
44 value.split_whitespace().collect::<Vec<_>>().join(" ")
45}
46
47fn normalize_candidate_text(value: &str) -> Option<String> {
48 let cleaned = value
49 .replace(['.', '_'], " ")
50 .replace(['[', ']', '(', ')', '{', '}'], " ")
51 .trim_matches(|c: char| {
52 matches!(c, '[' | ']' | '(' | ')' | '{' | '}' | ' ' | '.' | '_' | '-')
53 })
54 .to_string();
55
56 let cleaned = normalize_whitespace(&cleaned);
57 (!cleaned.is_empty()).then_some(cleaned)
58}
59
60fn normalized_metadata_token(token: &str) -> String {
61 token
62 .trim_matches(|c: char| !c.is_ascii_alphanumeric())
63 .to_ascii_lowercase()
64}
65
66fn looks_like_metadata_token(token: &str) -> bool {
67 let normalized = normalized_metadata_token(token);
68 match normalized.as_str() {
69 "480p" | "480i" | "720p" | "720i" | "1080p" | "1080i" | "2160p" | "2160i" | "4k"
70 | "bluray" | "bd" | "webdl" | "webrip" | "dvd" | "hdtv" | "remux" | "hevc" | "x264"
71 | "x265" | "h264" | "h265" | "av1" | "vp9" | "aac" | "flac" | "opus" | "ac3" | "dts"
72 | "mp3" | "mkv" | "mp4" | "avi" | "batch" | "complete" => true,
73 _ if normalized.starts_with('v') && normalized[1..].chars().all(|c| c.is_ascii_digit()) => {
74 true
75 }
76 _ if normalized.len() == 8 && normalized.chars().all(|c| c.is_ascii_hexdigit()) => true,
77 _ => false,
78 }
79}
80
81fn looks_like_metadata_noise(value: &str) -> bool {
82 let parts: Vec<&str> = value.split_whitespace().collect();
83 !parts.is_empty() && parts.iter().all(|part| looks_like_metadata_token(part))
84}
85
86fn clean_group_candidate(value: &str) -> Option<String> {
87 let cleaned = normalize_candidate_text(value)?;
88 if cleaned.len() < 2 || looks_like_metadata_noise(&cleaned) {
89 return None;
90 }
91 Some(cleaned)
92}
93
94fn sanitize_result(mut result: ParseResult) -> ParseResult {
95 result.group = result.group.as_deref().and_then(clean_group_candidate);
96 result
97}
98
99impl Parser {
100 pub fn new(config: ParserConfig) -> Result<Self> {
102 let heuristic = HeuristicParser::new()?;
103 Ok(Self { config, heuristic })
104 }
105
106 #[allow(clippy::should_implement_trait)]
108 pub fn default() -> Result<Self> {
109 Self::new(ParserConfig::default())
110 }
111
112 pub fn parse(&self, input: &str) -> Result<ParseResult> {
131 let result = self.heuristic.parse(input)?;
132 Ok(sanitize_result(result))
133 }
134
135 pub fn config(&self) -> &ParserConfig {
137 &self.config
138 }
139}
140
141pub fn parse(input: &str) -> Result<ParseResult> {
143 let parser = Parser::default()?;
144 parser.parse(input)
145}
146
147pub fn parse_with_mode(input: &str, _mode: ParseMode) -> Result<ParseResult> {
149 let parser = Parser::default()?;
150 parser.parse(input)
151}
152
153#[cfg(test)]
154mod tests {
155 use super::*;
156
157 #[test]
158 fn test_parser_creation() {
159 let parser = Parser::default();
160 assert!(parser.is_ok());
161 }
162
163 #[test]
164 fn test_parser_config() {
165 let config = ParserConfig::new().with_mode(ParseMode::Light);
166 assert_eq!(config.mode, ParseMode::Light);
167 }
168
169 #[test]
170 fn test_parse_light_mode() {
171 let config = ParserConfig::new().with_mode(ParseMode::Light);
172 let parser = Parser::new(config).unwrap();
173
174 let result = parser
175 .parse("[SubsPlease] Jujutsu Kaisen - 24 (1080p) [A1B2C3D4].mkv")
176 .unwrap();
177
178 assert_eq!(result.group.as_deref(), Some("SubsPlease"));
179 assert_eq!(result.title.as_deref(), Some("Jujutsu Kaisen"));
180 assert_eq!(result.parse_mode, ParseMode::Light);
181 }
182
183 #[test]
184 fn test_parse_empty() {
185 let parser = Parser::default().unwrap();
186 let result = parser.parse("");
187 assert!(result.is_err());
188 }
189
190 #[test]
191 fn test_parse_convenience_function() {
192 let result = parse("[Erai-raws] Test Anime - 01 (720p).mp4");
193 assert!(result.is_ok());
194
195 let parsed = result.unwrap();
196 assert_eq!(parsed.group.as_deref(), Some("Erai-raws"));
197 assert_eq!(parsed.extension.as_deref(), Some("mp4"));
198 }
199
200 #[test]
201 fn test_parse_with_mode() {
202 let result = parse_with_mode("[Test] Anime - 01.mkv", ParseMode::Light);
203 assert!(result.is_ok());
204 }
205}