Skip to main content

mangofetch_core/core/
cookie_parser.rs

1use std::collections::HashMap;
2use std::sync::Arc;
3
4fn cookie_domain_matches(
5    request_domain: &str,
6    cookie_domain: &str,
7    include_subdomains: bool,
8) -> bool {
9    if include_subdomains {
10        request_domain == cookie_domain || request_domain.ends_with(&format!(".{}", cookie_domain))
11    } else {
12        request_domain == cookie_domain
13    }
14}
15
16pub fn load_extension_cookies_for_domain(domain: &str) -> Option<Arc<reqwest::cookie::Jar>> {
17    let cookie_path = crate::core::ytdlp::ext_cookie_path_if_fresh()?;
18    let content = std::fs::read_to_string(&cookie_path).ok()?;
19
20    let jar = reqwest::cookie::Jar::default();
21    let mut count = 0usize;
22    let request_domain = domain.trim_start_matches('.').to_lowercase();
23
24    for line in content.lines() {
25        let line = line.trim();
26        if line.is_empty() {
27            continue;
28        }
29
30        let is_http_only_line = line.starts_with("#HttpOnly_");
31        let effective_line = if is_http_only_line {
32            &line["#HttpOnly_".len()..]
33        } else if line.starts_with('#') {
34            continue;
35        } else {
36            line
37        };
38
39        let parts: Vec<&str> = effective_line.split('\t').collect();
40        if parts.len() < 7 {
41            continue;
42        }
43
44        let raw_domain = parts[0];
45        let include_subdomains = parts[1].eq_ignore_ascii_case("TRUE");
46        let cookie_domain = raw_domain.trim_start_matches('.').to_lowercase();
47
48        if !cookie_domain_matches(&request_domain, &cookie_domain, include_subdomains) {
49            continue;
50        }
51
52        let name = parts[5];
53        let value = parts[6];
54        let url_scheme = if parts[3].eq_ignore_ascii_case("TRUE") {
55            "https"
56        } else {
57            "http"
58        };
59        let url_str = format!("{}://{}/", url_scheme, cookie_domain);
60
61        if let Ok(url) = url_str.parse::<reqwest::Url>() {
62            let cookie_str = format!("{}={}", name, value);
63            jar.add_cookie_str(&cookie_str, &url);
64            count += 1;
65        }
66    }
67
68    if count == 0 {
69        return None;
70    }
71
72    tracing::debug!(
73        "[cookies] loaded {} extension cookies for {}",
74        count,
75        domain
76    );
77    Some(Arc::new(jar))
78}
79
80pub fn load_extension_cookies_for_url(url: &str) -> Option<Arc<reqwest::cookie::Jar>> {
81    let domains = normalize_cookie_domains(url);
82    for domain in &domains {
83        if let Some(jar) = load_extension_cookies_for_domain(domain) {
84            return Some(jar);
85        }
86    }
87    None
88}
89
90fn normalize_cookie_domains(url: &str) -> Vec<String> {
91    let parsed = match url::Url::parse(url) {
92        Ok(p) => p,
93        Err(_) => return vec![],
94    };
95    let host = match parsed.host_str() {
96        Some(h) => h.to_lowercase(),
97        None => return vec![],
98    };
99
100    let mut domains = vec![];
101
102    let parts: Vec<&str> = host.split('.').collect();
103    if parts.len() >= 2 {
104        domains.push(format!(
105            "{}.{}",
106            parts[parts.len() - 2],
107            parts[parts.len() - 1]
108        ));
109    }
110
111    if host.contains("cdninstagram.com") || host.contains("fbcdn.net") {
112        domains.push("instagram.com".to_string());
113    }
114    if host.contains("twimg.com") {
115        domains.push("x.com".to_string());
116        domains.push("twitter.com".to_string());
117    }
118    if host.contains("redd.it") || host.contains("redditstatic.com") {
119        domains.push("reddit.com".to_string());
120    }
121    if host.contains("pstatic.net") || host.contains("pinimg.com") {
122        domains.push("pinterest.com".to_string());
123    }
124    if host.contains("tiktokcdn.com") || host.contains("tiktokv.com") {
125        domains.push("tiktok.com".to_string());
126    }
127    if host.contains("biliapi.net") || host.contains("bilivideo.com") || host.contains("hdslb.com")
128    {
129        domains.push("bilibili.com".to_string());
130    }
131    if host.contains("googlevideo.com") || host.contains("ytimg.com") {
132        domains.push("youtube.com".to_string());
133        domains.push("google.com".to_string());
134    }
135
136    domains
137}
138
139pub struct ParsedInput {
140    pub token: String,
141    pub cookie_string: String,
142    pub cookies: HashMap<String, String>,
143    pub extra_fields: HashMap<String, String>,
144}
145
146pub fn parse_cookie_input(input: &str, target_cookie: &str) -> ParsedInput {
147    let trimmed = input.trim();
148
149    if trimmed.starts_with('{') || trimmed.starts_with('[') {
150        if let Ok(val) = serde_json::from_str::<serde_json::Value>(trimmed) {
151            let cookie_array = if let Some(arr) = val.get("cookies").and_then(|c| c.as_array()) {
152                arr.clone()
153            } else if let Some(arr) = val.as_array() {
154                arr.clone()
155            } else if val.get("name").is_some() && val.get("value").is_some() {
156                vec![val.clone()]
157            } else {
158                Vec::new()
159            };
160
161            if !cookie_array.is_empty() {
162                let mut cookies = HashMap::new();
163                let mut parts = Vec::new();
164
165                for cookie_obj in &cookie_array {
166                    if let (Some(name), Some(value)) = (
167                        cookie_obj.get("name").and_then(|n| n.as_str()),
168                        cookie_obj.get("value").and_then(|v| v.as_str()),
169                    ) {
170                        cookies.insert(name.to_string(), value.to_string());
171                        parts.push(format!("{}={}", name, value));
172                    }
173                }
174
175                let cookie_string = parts.join("; ");
176
177                let token = if !target_cookie.is_empty() {
178                    if let Some(t) = cookies.get(target_cookie) {
179                        t.clone()
180                    } else {
181                        cookies
182                            .values()
183                            .find(|v| v.starts_with("eyJ"))
184                            .cloned()
185                            .unwrap_or_default()
186                    }
187                } else {
188                    cookies
189                        .values()
190                        .find(|v| v.starts_with("eyJ"))
191                        .cloned()
192                        .unwrap_or_default()
193                };
194
195                return ParsedInput {
196                    token,
197                    cookie_string,
198                    cookies,
199                    extra_fields: HashMap::new(),
200                };
201            }
202        }
203    }
204
205    if trimmed.contains("; ") || (trimmed.contains('=') && !trimmed.starts_with("eyJ")) {
206        let mut cookies = HashMap::new();
207        for pair in trimmed.split("; ") {
208            if let Some(idx) = pair.find('=') {
209                let name = pair[..idx].trim().to_string();
210                let value = pair[idx + 1..].trim().to_string();
211                cookies.insert(name, value);
212            }
213        }
214
215        let token = if !target_cookie.is_empty() {
216            cookies.get(target_cookie).cloned().unwrap_or_default()
217        } else {
218            cookies
219                .values()
220                .find(|v| v.starts_with("eyJ"))
221                .cloned()
222                .unwrap_or_default()
223        };
224
225        return ParsedInput {
226            token,
227            cookie_string: trimmed.to_string(),
228            cookies,
229            extra_fields: HashMap::new(),
230        };
231    }
232
233    let token = trimmed.to_string();
234    let cookie_string = if !target_cookie.is_empty() {
235        format!("{}={}", target_cookie, token)
236    } else {
237        String::new()
238    };
239    let mut cookies = HashMap::new();
240    if !target_cookie.is_empty() {
241        cookies.insert(target_cookie.to_string(), token.clone());
242    }
243
244    ParsedInput {
245        token,
246        cookie_string,
247        cookies,
248        extra_fields: HashMap::new(),
249    }
250}
251
252pub fn parse_bearer_input(input: &str) -> String {
253    let trimmed = input.trim();
254
255    if trimmed.starts_with('{') || trimmed.starts_with('[') {
256        if let Ok(val) = serde_json::from_str::<serde_json::Value>(trimmed) {
257            for key in &[
258                "access_token",
259                "token",
260                "idToken",
261                "bearerToken",
262                "bearer_token",
263            ] {
264                if let Some(t) = val.get(*key).and_then(|v| v.as_str()) {
265                    return t.to_string();
266                }
267            }
268
269            let cookie_array = if let Some(arr) = val.get("cookies").and_then(|c| c.as_array()) {
270                arr.clone()
271            } else if let Some(arr) = val.as_array() {
272                arr.clone()
273            } else {
274                Vec::new()
275            };
276
277            for cookie_obj in &cookie_array {
278                if let Some(value) = cookie_obj.get("value").and_then(|v| v.as_str()) {
279                    if value.starts_with("eyJ") && value.len() > 50 {
280                        return value.to_string();
281                    }
282                }
283            }
284
285            for cookie_obj in &cookie_array {
286                if let (Some(name), Some(value)) = (
287                    cookie_obj.get("name").and_then(|n| n.as_str()),
288                    cookie_obj.get("value").and_then(|v| v.as_str()),
289                ) {
290                    let lower = name.to_lowercase();
291                    if (lower.contains("token")
292                        || lower.contains("auth")
293                        || lower.contains("session")
294                        || lower.contains("sid"))
295                        && value.len() > 20
296                    {
297                        return value.to_string();
298                    }
299                }
300            }
301        }
302    }
303
304    trimmed.to_string()
305}
306
307#[cfg(test)]
308mod tests {
309    use super::*;
310
311    #[test]
312    fn host_only_matches_exact_only() {
313        assert!(cookie_domain_matches("example.com", "example.com", false));
314        assert!(!cookie_domain_matches(
315            "sub.example.com",
316            "example.com",
317            false
318        ));
319        assert!(!cookie_domain_matches(
320            "example.com",
321            "sub.example.com",
322            false
323        ));
324    }
325
326    #[test]
327    fn include_subdomains_allows_proper_suffix() {
328        assert!(cookie_domain_matches("example.com", "example.com", true));
329        assert!(cookie_domain_matches(
330            "sub.example.com",
331            "example.com",
332            true
333        ));
334        assert!(cookie_domain_matches(
335            "a.b.example.com",
336            "example.com",
337            true
338        ));
339    }
340
341    #[test]
342    fn substring_false_positive_rejected() {
343        assert!(!cookie_domain_matches("foo.com", "oo.com", true));
344        assert!(!cookie_domain_matches("foo.com", "oo.com", false));
345        assert!(!cookie_domain_matches(
346            "notexample.com",
347            "example.com",
348            true
349        ));
350    }
351
352    #[test]
353    fn parent_does_not_match_child_cookie() {
354        assert!(!cookie_domain_matches(
355            "example.com",
356            "sub.example.com",
357            true
358        ));
359    }
360
361    #[test]
362    fn normalize_strips_subdomain_to_apex() {
363        let d = normalize_cookie_domains("https://www.example.com/path");
364        assert_eq!(d.first().map(|s| s.as_str()), Some("example.com"));
365    }
366
367    #[test]
368    fn normalize_preserves_apex_when_already_two_parts() {
369        let d = normalize_cookie_domains("https://example.com/");
370        assert_eq!(d.first().map(|s| s.as_str()), Some("example.com"));
371    }
372
373    #[test]
374    fn normalize_maps_instagram_cdn_to_instagram_com() {
375        let d = normalize_cookie_domains("https://scontent.cdninstagram.com/v/t51/image.jpg");
376        assert!(d.contains(&"instagram.com".to_string()));
377
378        let d2 = normalize_cookie_domains("https://static.fbcdn.net/foo");
379        assert!(d2.contains(&"instagram.com".to_string()));
380    }
381
382    #[test]
383    fn normalize_maps_twimg_to_twitter_and_x() {
384        let d = normalize_cookie_domains("https://pbs.twimg.com/media/ABC.jpg");
385        assert!(d.contains(&"x.com".to_string()));
386        assert!(d.contains(&"twitter.com".to_string()));
387    }
388
389    #[test]
390    fn normalize_maps_redd_it_to_reddit_com() {
391        let d = normalize_cookie_domains("https://v.redd.it/abc123");
392        assert!(d.contains(&"reddit.com".to_string()));
393
394        let d2 = normalize_cookie_domains("https://www.redditstatic.com/foo.js");
395        assert!(d2.contains(&"reddit.com".to_string()));
396    }
397
398    #[test]
399    fn normalize_maps_pinimg_to_pinterest_com() {
400        let d = normalize_cookie_domains("https://i.pinimg.com/236x/ab/cd/ef/x.jpg");
401        assert!(d.contains(&"pinterest.com".to_string()));
402    }
403
404    #[test]
405    fn normalize_maps_tiktokcdn_to_tiktok_com() {
406        let d = normalize_cookie_domains("https://v16-webapp.tiktokcdn.com/video.mp4");
407        assert!(d.contains(&"tiktok.com".to_string()));
408
409        let d2 = normalize_cookie_domains("https://v16-cold.tiktokv.com/video.mp4");
410        assert!(d2.contains(&"tiktok.com".to_string()));
411    }
412
413    #[test]
414    fn normalize_maps_bilivideo_to_bilibili_com() {
415        let d = normalize_cookie_domains("https://upos-sz-mirrorhw.bilivideo.com/x.mp4");
416        assert!(d.contains(&"bilibili.com".to_string()));
417
418        let d2 = normalize_cookie_domains("https://i0.hdslb.com/bfs/cover.jpg");
419        assert!(d2.contains(&"bilibili.com".to_string()));
420    }
421
422    #[test]
423    fn normalize_maps_googlevideo_to_youtube_and_google() {
424        let d = normalize_cookie_domains("https://rr1---sn-abcd.googlevideo.com/videoplayback");
425        assert!(d.contains(&"youtube.com".to_string()));
426        assert!(d.contains(&"google.com".to_string()));
427
428        let d2 = normalize_cookie_domains("https://i.ytimg.com/vi/abc/hq.jpg");
429        assert!(d2.contains(&"youtube.com".to_string()));
430    }
431
432    #[test]
433    fn normalize_returns_empty_for_unparsable_url() {
434        let d = normalize_cookie_domains("::not a url::");
435        assert!(d.is_empty());
436    }
437
438    #[test]
439    fn normalize_returns_empty_for_url_without_host() {
440        let d = normalize_cookie_domains("file:///tmp/foo");
441        assert!(d.is_empty());
442    }
443
444    #[test]
445    fn parse_cookie_input_handles_semicolon_format() {
446        let parsed = parse_cookie_input("sessionid=abc; csrftoken=xyz", "sessionid");
447
448        assert_eq!(parsed.token, "abc");
449        assert_eq!(
450            parsed.cookies.get("sessionid").map(|s| s.as_str()),
451            Some("abc")
452        );
453        assert_eq!(
454            parsed.cookies.get("csrftoken").map(|s| s.as_str()),
455            Some("xyz")
456        );
457    }
458
459    #[test]
460    fn parse_cookie_input_semicolon_format_with_missing_target_returns_empty_token() {
461        let parsed = parse_cookie_input(
462            "other=foo; auth=eyJhbGciOiJIUzI1NiJ9.payload.sig",
463            "nonexistent",
464        );
465
466        assert_eq!(parsed.token, "");
467        assert_eq!(parsed.cookies.len(), 2);
468    }
469
470    #[test]
471    fn parse_cookie_input_json_array_with_missing_target_falls_back_to_jwt_prefix() {
472        let input = r#"[{"name":"other","value":"foo"},{"name":"auth","value":"eyJabcdef"}]"#;
473        let parsed = parse_cookie_input(input, "nonexistent");
474
475        assert!(parsed.token.starts_with("eyJ"));
476    }
477
478    #[test]
479    fn parse_cookie_input_parses_json_array() {
480        let input = r#"[
481            {"name":"sessionid","value":"abc"},
482            {"name":"csrftoken","value":"xyz"}
483        ]"#;
484        let parsed = parse_cookie_input(input, "sessionid");
485
486        assert_eq!(parsed.token, "abc");
487        assert_eq!(parsed.cookies.len(), 2);
488        assert!(parsed.cookie_string.contains("sessionid=abc"));
489        assert!(parsed.cookie_string.contains("csrftoken=xyz"));
490    }
491
492    #[test]
493    fn parse_cookie_input_parses_json_object_with_cookies_array() {
494        let input = r#"{"cookies":[{"name":"x","value":"1"}]}"#;
495        let parsed = parse_cookie_input(input, "x");
496
497        assert_eq!(parsed.token, "1");
498        assert_eq!(parsed.cookies.get("x").map(|s| s.as_str()), Some("1"));
499    }
500
501    #[test]
502    fn parse_cookie_input_treats_raw_token_with_no_equals_as_token_only() {
503        let parsed = parse_cookie_input("just_a_token_value", "sessionid");
504
505        assert_eq!(parsed.token, "just_a_token_value");
506        assert_eq!(
507            parsed.cookies.get("sessionid").map(|s| s.as_str()),
508            Some("just_a_token_value"),
509        );
510        assert_eq!(parsed.cookie_string, "sessionid=just_a_token_value");
511    }
512
513    #[test]
514    fn parse_cookie_input_accepts_single_cookie_json_object() {
515        let input = r#"{"name":"single","value":"v"}"#;
516        let parsed = parse_cookie_input(input, "single");
517
518        assert_eq!(parsed.token, "v");
519        assert_eq!(parsed.cookies.get("single").map(|s| s.as_str()), Some("v"));
520    }
521
522    #[test]
523    fn parse_cookie_input_empty_target_finds_jwt_value() {
524        let parsed = parse_cookie_input("a=b; token=eyJabcdef", "");
525
526        assert_eq!(parsed.token, "eyJabcdef");
527    }
528
529    #[test]
530    fn parse_bearer_input_extracts_access_token_from_json() {
531        let input = r#"{"access_token":"secret123"}"#;
532
533        assert_eq!(parse_bearer_input(input), "secret123");
534    }
535
536    #[test]
537    fn parse_bearer_input_prefers_access_token_over_token_field() {
538        let input = r#"{"access_token":"want_this","token":"not_this"}"#;
539
540        assert_eq!(parse_bearer_input(input), "want_this");
541    }
542
543    #[test]
544    fn parse_bearer_input_extracts_jwt_value_from_cookies_array() {
545        let input = r#"[{"name":"sess","value":"eyJhbGciOiJIUzI1NiJ9.payloadpayloadpayloadpayloadpayloadpayload.sig"}]"#;
546
547        let out = parse_bearer_input(input);
548        assert!(out.starts_with("eyJ"));
549        assert!(out.len() > 50);
550    }
551
552    #[test]
553    fn parse_bearer_input_falls_back_to_auth_like_cookie_name() {
554        let input =
555            r#"{"cookies":[{"name":"auth_session","value":"long_enough_session_value_xyz"}]}"#;
556
557        assert_eq!(parse_bearer_input(input), "long_enough_session_value_xyz");
558    }
559
560    #[test]
561    fn parse_bearer_input_returns_raw_when_not_json() {
562        let input = "raw_bearer_token_xyz";
563
564        assert_eq!(parse_bearer_input(input), "raw_bearer_token_xyz");
565    }
566
567    #[test]
568    fn parse_bearer_input_rejects_short_auth_values() {
569        let input = r#"{"cookies":[{"name":"auth","value":"short"}]}"#;
570
571        assert_eq!(parse_bearer_input(input), input);
572    }
573
574    #[test]
575    fn netscape_leading_dot_in_cookie_domain_matches_subdomain() {
576        let raw_domain = ".example.com";
577        let cookie_domain = raw_domain.trim_start_matches('.').to_lowercase();
578        let request_domain = "sub.example.com".trim_start_matches('.').to_lowercase();
579        let include_subdomains = raw_domain.starts_with('.');
580
581        assert!(cookie_domain_matches(
582            &request_domain,
583            &cookie_domain,
584            include_subdomains
585        ));
586    }
587
588    #[test]
589    fn netscape_no_leading_dot_is_host_only() {
590        let raw_domain = "example.com";
591        let cookie_domain = raw_domain.trim_start_matches('.').to_lowercase();
592        let request_domain = "sub.example.com".trim_start_matches('.').to_lowercase();
593
594        assert!(!cookie_domain_matches(
595            &request_domain,
596            &cookie_domain,
597            false
598        ));
599    }
600}