1use crate::error::{YdlError, YdlResult};
2use regex::Regex;
3use url::Url;
4
5pub struct YouTubeParser {
7 video_id_regex: Regex,
8 youtube_domains: Vec<&'static str>,
9}
10
11impl Default for YouTubeParser {
12 fn default() -> Self {
13 Self::new()
14 }
15}
16
17impl YouTubeParser {
18 pub fn new() -> Self {
20 let video_id_regex = Regex::new(r"^[a-zA-Z0-9_-]{11}$").expect("Valid video ID regex");
22
23 let youtube_domains = vec![
24 "youtube.com",
25 "www.youtube.com",
26 "youtu.be",
27 "m.youtube.com",
28 "youtube-nocookie.com",
29 "www.youtube-nocookie.com",
30 ];
31
32 Self {
33 video_id_regex,
34 youtube_domains,
35 }
36 }
37
38 pub fn parse_url(&self, url_str: &str) -> YdlResult<String> {
40 let url = Url::parse(url_str).map_err(|_| YdlError::InvalidUrl {
42 url: url_str.to_string(),
43 })?;
44
45 self.validate_domain(&url)?;
47
48 self.extract_video_id(&url)
50 }
51
52 fn validate_domain(&self, url: &Url) -> YdlResult<()> {
54 let domain = url.domain().ok_or_else(|| YdlError::InvalidUrl {
55 url: url.to_string(),
56 })?;
57
58 if !self.youtube_domains.contains(&domain) {
59 return Err(YdlError::InvalidUrl {
60 url: url.to_string(),
61 });
62 }
63
64 Ok(())
65 }
66
67 fn extract_video_id(&self, url: &Url) -> YdlResult<String> {
69 let domain = url.domain().unwrap();
70
71 match domain {
72 "youtu.be" => {
74 let path = url.path().trim_start_matches('/');
75 let video_id = path.split('/').next().unwrap_or("");
77 self.validate_and_return_video_id(video_id, url)
78 }
79 _ => {
81 if let Ok(id) = self.extract_from_watch_url(url) {
83 return Ok(id);
84 }
85 if let Ok(id) = self.extract_from_embed_url(url) {
86 return Ok(id);
87 }
88 if let Ok(id) = self.extract_from_shorts_url(url) {
89 return Ok(id);
90 }
91
92 Err(YdlError::InvalidUrl {
93 url: url.to_string(),
94 })
95 }
96 }
97 }
98
99 fn extract_from_watch_url(&self, url: &Url) -> YdlResult<String> {
101 if url.path() != "/watch" {
102 return Err(YdlError::InvalidUrl {
103 url: url.to_string(),
104 });
105 }
106
107 let video_id = url
108 .query_pairs()
109 .find(|(key, _)| key == "v")
110 .map(|(_, value)| value.to_string())
111 .ok_or_else(|| YdlError::InvalidUrl {
112 url: url.to_string(),
113 })?;
114
115 self.validate_and_return_video_id(&video_id, url)
116 }
117
118 fn extract_from_embed_url(&self, url: &Url) -> YdlResult<String> {
120 let path_segments: Vec<&str> = url
121 .path_segments()
122 .ok_or_else(|| YdlError::InvalidUrl {
123 url: url.to_string(),
124 })?
125 .collect();
126
127 if path_segments.len() >= 2 && path_segments[0] == "embed" {
128 let video_id = path_segments[1];
129 return self.validate_and_return_video_id(video_id, url);
130 }
131
132 Err(YdlError::InvalidUrl {
133 url: url.to_string(),
134 })
135 }
136
137 fn extract_from_shorts_url(&self, url: &Url) -> YdlResult<String> {
139 let path_segments: Vec<&str> = url
140 .path_segments()
141 .ok_or_else(|| YdlError::InvalidUrl {
142 url: url.to_string(),
143 })?
144 .collect();
145
146 if path_segments.len() >= 2 && path_segments[0] == "shorts" {
147 let video_id = path_segments[1];
148 return self.validate_and_return_video_id(video_id, url);
149 }
150
151 Err(YdlError::InvalidUrl {
152 url: url.to_string(),
153 })
154 }
155
156 fn validate_and_return_video_id(&self, video_id: &str, _url: &Url) -> YdlResult<String> {
158 if self.is_valid_video_id(video_id) {
159 Ok(video_id.to_string())
160 } else {
161 Err(YdlError::InvalidVideoId {
162 video_id: video_id.to_string(),
163 })
164 }
165 }
166
167 pub fn is_valid_video_id(&self, video_id: &str) -> bool {
169 self.video_id_regex.is_match(video_id)
170 }
171
172 pub fn normalize_url(&self, url_str: &str) -> YdlResult<String> {
174 let video_id = self.parse_url(url_str)?;
175 Ok(format!("https://www.youtube.com/watch?v={}", video_id))
176 }
177
178 pub fn extract_video_id_direct(&self, input: &str) -> YdlResult<String> {
180 if self.is_valid_video_id(input) {
181 Ok(input.to_string())
182 } else {
183 self.parse_url(input)
184 }
185 }
186}
187
188pub fn parse_youtube_url(url: &str) -> YdlResult<String> {
190 YouTubeParser::new().parse_url(url)
191}
192
193pub fn is_valid_video_id(video_id: &str) -> bool {
195 YouTubeParser::new().is_valid_video_id(video_id)
196}
197
198pub fn normalize_youtube_url(url: &str) -> YdlResult<String> {
200 YouTubeParser::new().normalize_url(url)
201}
202
203#[cfg(test)]
204mod tests {
205 use super::*;
206
207 fn parser() -> YouTubeParser {
208 YouTubeParser::new()
209 }
210
211 #[test]
212 fn test_parse_standard_watch_url() {
213 let parser = parser();
214
215 let urls = vec![
217 "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
218 "https://youtube.com/watch?v=dQw4w9WgXcQ",
219 "http://www.youtube.com/watch?v=dQw4w9WgXcQ",
220 "https://m.youtube.com/watch?v=dQw4w9WgXcQ",
221 ];
222
223 for url in urls {
224 let result = parser.parse_url(url);
225 assert!(result.is_ok(), "Failed to parse: {}", url);
226 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
227 }
228 }
229
230 #[test]
231 fn test_parse_short_urls() {
232 let parser = parser();
233
234 let urls = vec![
235 "https://youtu.be/dQw4w9WgXcQ",
236 "http://youtu.be/dQw4w9WgXcQ",
237 "youtu.be/dQw4w9WgXcQ",
238 ];
239
240 for url in urls {
241 let result = parser.parse_url(url);
242 if result.is_err() {
243 let full_url = format!("https://{}", url);
245 let result = parser.parse_url(&full_url);
246 assert!(result.is_ok(), "Failed to parse: {}", url);
247 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
248 } else {
249 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
250 }
251 }
252 }
253
254 #[test]
255 fn test_parse_embed_urls() {
256 let parser = parser();
257
258 let urls = vec![
259 "https://www.youtube.com/embed/dQw4w9WgXcQ",
260 "https://www.youtube-nocookie.com/embed/dQw4w9WgXcQ",
261 ];
262
263 for url in urls {
264 let result = parser.parse_url(url);
265 assert!(result.is_ok(), "Failed to parse: {}", url);
266 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
267 }
268 }
269
270 #[test]
271 fn test_parse_shorts_urls() {
272 let parser = parser();
273
274 let url = "https://www.youtube.com/shorts/dQw4w9WgXcQ";
275 let result = parser.parse_url(url);
276 assert!(result.is_ok());
277 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
278 }
279
280 #[test]
281 fn test_parse_urls_with_additional_params() {
282 let parser = parser();
283
284 let urls = vec![
285 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=10s",
286 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&list=PLrCZdFsaG",
287 "https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=10s&list=PLrCZdFsaG",
288 "https://youtu.be/dQw4w9WgXcQ?t=10s",
289 ];
290
291 for url in urls {
292 let result = parser.parse_url(url);
293 assert!(result.is_ok(), "Failed to parse: {}", url);
294 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
295 }
296 }
297
298 #[test]
299 fn test_invalid_urls() {
300 let parser = parser();
301
302 let invalid_urls = vec![
303 "https://www.google.com/watch?v=dQw4w9WgXcQ", "https://www.youtube.com/watch", "https://www.youtube.com/watch?list=PLrCZdFsaG", "https://www.youtube.com/user/someuser", "not-a-url-at-all", "", ];
310
311 for url in invalid_urls {
312 let result = parser.parse_url(url);
313 assert!(result.is_err(), "Should fail to parse: {}", url);
314 }
315 }
316
317 #[test]
318 fn test_invalid_video_ids() {
319 let parser = parser();
320
321 let invalid_ids = vec![
322 "short", "way_too_long_video_id", "invalid-chars!", "dQw4w9WgXc", "dQw4w9WgXcQQ", ];
328
329 for id in invalid_ids {
330 assert!(!parser.is_valid_video_id(id), "Should be invalid: {}", id);
331 }
332 }
333
334 #[test]
335 fn test_valid_video_ids() {
336 let parser = parser();
337
338 let valid_ids = vec!["dQw4w9WgXcQ", "aBc_123-XyZ", "0123456789a", "_-_-_-_-_-_"];
339
340 for id in valid_ids {
341 assert!(parser.is_valid_video_id(id), "Should be valid: {}", id);
342 }
343 }
344
345 #[test]
346 fn test_normalize_url() {
347 let parser = parser();
348
349 let test_cases = vec![
350 (
351 "https://youtu.be/dQw4w9WgXcQ",
352 "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
353 ),
354 (
355 "https://www.youtube.com/embed/dQw4w9WgXcQ",
356 "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
357 ),
358 (
359 "https://m.youtube.com/watch?v=dQw4w9WgXcQ&t=10s",
360 "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
361 ),
362 ];
363
364 for (input, expected) in test_cases {
365 let result = parser.normalize_url(input);
366 assert!(result.is_ok(), "Failed to normalize: {}", input);
367 assert_eq!(result.unwrap(), expected);
368 }
369 }
370
371 #[test]
372 fn test_extract_video_id_direct() {
373 let parser = parser();
374
375 let result = parser.extract_video_id_direct("dQw4w9WgXcQ");
377 assert!(result.is_ok());
378 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
379
380 let result = parser.extract_video_id_direct("https://youtu.be/dQw4w9WgXcQ");
382 assert!(result.is_ok());
383 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
384
385 let result = parser.extract_video_id_direct("invalid");
387 assert!(result.is_err());
388 }
389
390 #[test]
391 fn test_convenience_functions() {
392 let result = parse_youtube_url("https://youtu.be/dQw4w9WgXcQ");
394 assert!(result.is_ok());
395 assert_eq!(result.unwrap(), "dQw4w9WgXcQ");
396
397 assert!(is_valid_video_id("dQw4w9WgXcQ"));
399 assert!(!is_valid_video_id("invalid"));
400
401 let result = normalize_youtube_url("https://youtu.be/dQw4w9WgXcQ");
403 assert!(result.is_ok());
404 assert_eq!(
405 result.unwrap(),
406 "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
407 );
408 }
409}