Skip to main content

mangofetch_core/platforms/reddit/
mod.rs

1use anyhow::anyhow;
2use async_trait::async_trait;
3use tokio::sync::mpsc;
4
5use crate::core::direct_downloader;
6use crate::core::ffmpeg;
7use crate::core::redirect;
8use crate::models::media::{DownloadOptions, DownloadResult, MediaInfo, MediaType, VideoQuality};
9use crate::platforms::traits::PlatformDownloader;
10
11const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36";
12
13pub struct RedditDownloader {
14    client: reqwest::Client,
15}
16
17enum RedditMedia {
18    Video {
19        video_url: String,
20        duration: Option<f64>,
21    },
22    Gif {
23        url: String,
24    },
25    Image {
26        url: String,
27    },
28    Gallery {
29        items: Vec<GalleryItem>,
30    },
31}
32
33struct GalleryItem {
34    url: String,
35    ext: String,
36}
37
38impl Default for RedditDownloader {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl RedditDownloader {
45    pub fn new() -> Self {
46        let mut builder = crate::core::http_client::apply_global_proxy(reqwest::Client::builder())
47            .user_agent(USER_AGENT)
48            .timeout(std::time::Duration::from_secs(120))
49            .connect_timeout(std::time::Duration::from_secs(15));
50
51        if let Some(jar) =
52            crate::core::cookie_parser::load_extension_cookies_for_domain("reddit.com")
53        {
54            builder = builder.cookie_provider(jar);
55        }
56
57        let client = builder.build().unwrap_or_default();
58        Self { client }
59    }
60
61    fn extract_post_id(url: &str) -> Option<String> {
62        let parsed = url::Url::parse(url).ok()?;
63        let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
64
65        if segments.len() >= 4 && segments[0] == "r" && segments[2] == "comments" {
66            return Some(segments[3].to_string());
67        }
68
69        if segments.first() == Some(&"comments") {
70            return segments.get(1).map(|s| s.to_string());
71        }
72
73        if segments.first() == Some(&"video") {
74            return segments.get(1).map(|s| s.to_string());
75        }
76
77        None
78    }
79
80    fn extract_subreddit(url: &str) -> Option<String> {
81        let parsed = url::Url::parse(url).ok()?;
82        let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
83        if segments.first() == Some(&"r") {
84            return segments.get(1).map(|s| s.to_string());
85        }
86        None
87    }
88
89    fn is_short_link(url: &str) -> bool {
90        if let Ok(parsed) = url::Url::parse(url) {
91            if let Some(host) = parsed.host_str() {
92                let host = host.to_lowercase();
93                return host == "v.redd.it" || host == "redd.it";
94            }
95        }
96        false
97    }
98
99    fn is_share_link(url: &str) -> bool {
100        if let Ok(parsed) = url::Url::parse(url) {
101            let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
102            return segments.len() >= 4 && segments[0] == "r" && segments[2] == "s";
103        }
104        false
105    }
106
107    async fn resolve_to_canonical(&self, url: &str) -> anyhow::Result<String> {
108        if Self::is_short_link(url) {
109            return redirect::resolve_redirect(&self.client, url).await;
110        }
111
112        if Self::is_share_link(url) {
113            return redirect::resolve_redirect(&self.client, url).await;
114        }
115
116        Ok(url.to_string())
117    }
118
119    async fn fetch_post_data(&self, post_id: &str) -> anyhow::Result<serde_json::Value> {
120        let url = format!("https://www.reddit.com/comments/{}.json", post_id);
121
122        let response = self
123            .client
124            .get(&url)
125            .header("Accept", "application/json")
126            .send()
127            .await?;
128
129        if !response.status().is_success() {
130            return Err(anyhow!("Reddit retornou HTTP {}", response.status()));
131        }
132
133        let json: serde_json::Value = response.json().await?;
134
135        if !json.is_array() {
136            return Err(anyhow!("Post not found"));
137        }
138
139        json.as_array()
140            .and_then(|arr| arr.first())
141            .and_then(|listing| listing.pointer("/data/children/0/data"))
142            .cloned()
143            .ok_or_else(|| anyhow!("Post not found"))
144    }
145
146    fn construct_audio_url(fallback_url: &str) -> Vec<String> {
147        let video = fallback_url.split('?').next().unwrap_or(fallback_url);
148        let mut candidates = Vec::new();
149
150        if video.contains(".mp4") {
151            if let Some(base) = video.split('_').next() {
152                candidates.push(format!("{}_audio.mp4", base));
153                candidates.push(format!("{}_AUDIO_128.mp4", base));
154            }
155        }
156
157        if let Some(dash_pos) = video.find("DASH") {
158            candidates.push(format!("{}audio", &video[..dash_pos]));
159        }
160
161        candidates
162    }
163
164    async fn find_audio_url(&self, fallback_url: &str) -> Option<String> {
165        let candidates = Self::construct_audio_url(fallback_url);
166
167        for candidate in candidates {
168            let resp = tokio::time::timeout(
169                std::time::Duration::from_secs(5),
170                self.client.head(&candidate).send(),
171            )
172            .await;
173
174            if let Ok(Ok(r)) = resp {
175                if r.status().is_success() {
176                    return Some(candidate);
177                }
178            }
179        }
180
181        None
182    }
183
184    fn get_resolution_variants(video_url: &str) -> Vec<String> {
185        let resolutions = [
186            "DASH_720.mp4",
187            "DASH_480.mp4",
188            "DASH_360.mp4",
189            "DASH_240.mp4",
190        ];
191        let mut variants = vec![video_url.to_string()];
192        for res in &resolutions {
193            if !video_url.contains(res) {
194                if let Some(base) = video_url.rfind("DASH_") {
195                    let mut variant = video_url[..base].to_string();
196                    variant.push_str(res);
197                    variants.push(variant);
198                }
199            }
200        }
201        variants
202    }
203
204    async fn download_video_with_fallback(
205        &self,
206        video_url: &str,
207        output: &std::path::Path,
208        progress_tx: mpsc::Sender<f64>,
209    ) -> anyhow::Result<u64> {
210        let variants = Self::get_resolution_variants(video_url);
211        let mut last_err = anyhow!("No resolution available");
212
213        for variant in &variants {
214            match direct_downloader::download_direct(
215                &self.client,
216                variant,
217                output,
218                progress_tx.clone(),
219                None,
220            )
221            .await
222            {
223                Ok(bytes) => return Ok(bytes),
224                Err(e) => {
225                    last_err = e;
226                    let _ = tokio::fs::remove_file(output).await;
227                }
228            }
229        }
230
231        Err(last_err)
232    }
233
234    fn parse_media(data: &serde_json::Value) -> Option<RedditMedia> {
235        let is_gallery = data
236            .get("is_gallery")
237            .and_then(|v| v.as_bool())
238            .unwrap_or(false);
239        if is_gallery {
240            if let Some(gallery) = Self::parse_gallery(data) {
241                return Some(gallery);
242            }
243        }
244
245        if let Some(url) = data.get("url").and_then(|v| v.as_str()) {
246            if url.ends_with(".gif") {
247                return Some(RedditMedia::Gif {
248                    url: url.to_string(),
249                });
250            }
251        }
252
253        if let Some(reddit_video) = data.pointer("/secure_media/reddit_video") {
254            let fallback = reddit_video.get("fallback_url").and_then(|v| v.as_str())?;
255            let duration = reddit_video.get("duration").and_then(|v| v.as_f64());
256            let video_url = fallback.split('?').next().unwrap_or(fallback).to_string();
257
258            return Some(RedditMedia::Video {
259                video_url,
260                duration,
261            });
262        }
263
264        if let Some(url) = data.get("url").and_then(|v| v.as_str()) {
265            let is_media = data
266                .get("is_reddit_media_domain")
267                .and_then(|v| v.as_bool())
268                .unwrap_or(false);
269            if is_media
270                || url.contains("i.redd.it")
271                || url.ends_with(".jpg")
272                || url.ends_with(".png")
273                || url.ends_with(".jpeg")
274            {
275                return Some(RedditMedia::Image {
276                    url: url.to_string(),
277                });
278            }
279        }
280
281        None
282    }
283
284    fn parse_gallery(data: &serde_json::Value) -> Option<RedditMedia> {
285        let gallery_data = data.get("gallery_data")?.get("items")?.as_array()?;
286        let media_metadata = data.get("media_metadata")?;
287
288        let mut items = Vec::new();
289
290        for item in gallery_data {
291            let media_id = item.get("media_id").and_then(|v| v.as_str())?;
292            let meta = media_metadata.get(media_id)?;
293
294            let mime = meta
295                .get("m")
296                .and_then(|v| v.as_str())
297                .unwrap_or("image/jpeg");
298            let ext = match mime {
299                "image/png" => "png",
300                "image/gif" => "gif",
301                "image/webp" => "webp",
302                _ => "jpg",
303            };
304
305            let url = if let Some(source) = meta.get("s") {
306                source
307                    .get("u")
308                    .or_else(|| source.get("gif"))
309                    .and_then(|v| v.as_str())
310                    .map(|u| u.replace("&amp;", "&"))
311            } else {
312                None
313            };
314
315            if let Some(url) = url {
316                items.push(GalleryItem {
317                    url,
318                    ext: ext.to_string(),
319                });
320            }
321        }
322
323        if items.is_empty() {
324            return None;
325        }
326
327        Some(RedditMedia::Gallery { items })
328    }
329}
330
331#[async_trait]
332impl PlatformDownloader for RedditDownloader {
333    fn name(&self) -> &str {
334        "reddit"
335    }
336
337    fn can_handle(&self, url: &str) -> bool {
338        if let Ok(parsed) = url::Url::parse(url) {
339            if let Some(host) = parsed.host_str() {
340                let host = host.to_lowercase();
341                return host == "reddit.com"
342                    || host.ends_with(".reddit.com")
343                    || host == "v.redd.it"
344                    || host == "redd.it";
345            }
346        }
347        false
348    }
349
350    async fn get_media_info(&self, url: &str) -> anyhow::Result<MediaInfo> {
351        match self.native_get_media_info(url).await {
352            Ok(info) => Ok(info),
353            Err(native_err) => {
354                tracing::warn!(
355                    "[reddit] native failed: {}, trying yt-dlp fallback",
356                    native_err
357                );
358                self.fallback_ytdlp(url).await.map_err(|_| native_err)
359            }
360        }
361    }
362
363    async fn download(
364        &self,
365        info: &MediaInfo,
366        opts: &DownloadOptions,
367        progress: mpsc::Sender<f64>,
368    ) -> anyhow::Result<DownloadResult> {
369        if let Some(quality) = info.available_qualities.first() {
370            if quality.format == "ytdlp" {
371                let ytdlp_path = crate::core::ytdlp::ensure_ytdlp(None).await?;
372                return crate::core::ytdlp::download_video(
373                    &ytdlp_path,
374                    &quality.url,
375                    &opts.output_dir,
376                    None,
377                    progress,
378                    opts.download_mode.as_deref(),
379                    opts.video_format.as_deref(),
380                    opts.audio_format.as_deref(),
381                    opts.audio_quality.as_deref(),
382                    opts.format_id.as_deref(),
383                    opts.filename_template.as_deref(),
384                    opts.referer.as_deref().or(Some("https://www.reddit.com/")),
385                    opts.cancel_token.clone(),
386                    None,
387                    opts.concurrent_fragments,
388                    false,
389                    &[],
390                )
391                .await;
392            }
393        }
394
395        self.native_download(info, opts, progress).await
396    }
397}
398
399impl RedditDownloader {
400    async fn fallback_ytdlp(&self, url: &str) -> anyhow::Result<MediaInfo> {
401        let ytdlp_path = crate::core::ytdlp::ensure_ytdlp(None).await?;
402        let json = crate::core::ytdlp::get_video_info(&ytdlp_path, url, &[]).await?;
403        crate::platforms::generic_ytdlp::GenericYtdlpDownloader::parse_video_info(&json)
404    }
405
406    async fn native_get_media_info(&self, url: &str) -> anyhow::Result<MediaInfo> {
407        let canonical = self.resolve_to_canonical(url).await?;
408
409        let post_id = Self::extract_post_id(&canonical)
410            .ok_or_else(|| anyhow!("Could not extract post ID"))?;
411
412        let subreddit = Self::extract_subreddit(&canonical).unwrap_or_default();
413
414        let data = self.fetch_post_data(&post_id).await?;
415
416        let media = Self::parse_media(&data).ok_or_else(|| anyhow!("No media found in post"))?;
417
418        let source_id = if subreddit.is_empty() {
419            post_id.clone()
420        } else {
421            format!("{}_{}", subreddit.to_lowercase(), post_id)
422        };
423
424        let title = format!("reddit_{}", source_id);
425
426        match media {
427            RedditMedia::Video {
428                video_url,
429                duration,
430            } => {
431                let audio = self.find_audio_url(&video_url).await;
432                let mut qualities = vec![VideoQuality {
433                    label: "video".to_string(),
434                    width: 0,
435                    height: 0,
436                    url: video_url,
437                    format: "mp4".to_string(),
438
439                    filesize_bytes: None,
440                }];
441
442                if let Some(audio_url) = audio {
443                    qualities.push(VideoQuality {
444                        label: "audio".to_string(),
445                        width: 0,
446                        height: 0,
447                        url: audio_url,
448                        format: "mp4_audio".to_string(),
449
450                        filesize_bytes: None,
451                    });
452                }
453
454                Ok(MediaInfo {
455                    title,
456                    author: subreddit,
457                    platform: "reddit".to_string(),
458                    duration_seconds: duration,
459                    thumbnail_url: None,
460                    available_qualities: qualities,
461                    media_type: MediaType::Video,
462                    file_size_bytes: None,
463                })
464            }
465            RedditMedia::Gif { url: gif_url } => Ok(MediaInfo {
466                title,
467                author: subreddit,
468                platform: "reddit".to_string(),
469                duration_seconds: None,
470                thumbnail_url: None,
471                available_qualities: vec![VideoQuality {
472                    label: "original".to_string(),
473                    width: 0,
474                    height: 0,
475                    url: gif_url,
476                    format: "gif".to_string(),
477
478                    filesize_bytes: None,
479                }],
480                media_type: MediaType::Gif,
481                file_size_bytes: None,
482            }),
483            RedditMedia::Image { url: image_url } => {
484                let ext = if image_url.ends_with(".png") {
485                    "png"
486                } else {
487                    "jpg"
488                };
489                Ok(MediaInfo {
490                    title,
491                    author: subreddit,
492                    platform: "reddit".to_string(),
493                    duration_seconds: None,
494                    thumbnail_url: None,
495                    available_qualities: vec![VideoQuality {
496                        label: "original".to_string(),
497                        width: 0,
498                        height: 0,
499                        url: image_url,
500                        format: ext.to_string(),
501
502                        filesize_bytes: None,
503                    }],
504                    media_type: MediaType::Photo,
505                    file_size_bytes: None,
506                })
507            }
508            RedditMedia::Gallery { items } => {
509                let qualities: Vec<VideoQuality> = items
510                    .into_iter()
511                    .enumerate()
512                    .map(|(i, item)| VideoQuality {
513                        label: format!("media_{}", i + 1),
514                        width: 0,
515                        height: 0,
516                        url: item.url,
517                        format: item.ext,
518
519                        filesize_bytes: None,
520                    })
521                    .collect();
522
523                Ok(MediaInfo {
524                    title,
525                    author: subreddit,
526                    platform: "reddit".to_string(),
527                    duration_seconds: None,
528                    thumbnail_url: None,
529                    available_qualities: qualities,
530                    media_type: MediaType::Carousel,
531                    file_size_bytes: None,
532                })
533            }
534        }
535    }
536
537    async fn native_download(
538        &self,
539        info: &MediaInfo,
540        opts: &DownloadOptions,
541        progress: mpsc::Sender<f64>,
542    ) -> anyhow::Result<DownloadResult> {
543        match info.media_type {
544            MediaType::Video => {
545                let video_quality = info
546                    .available_qualities
547                    .iter()
548                    .find(|q| q.label == "video")
549                    .ok_or_else(|| anyhow!("No video URL"))?;
550
551                let audio_quality = info.available_qualities.iter().find(|q| q.label == "audio");
552
553                let has_audio = audio_quality.is_some();
554                let ffmpeg_available = ffmpeg::is_ffmpeg_available().await;
555
556                if has_audio && !ffmpeg_available {
557                    tracing::warn!("[reddit] Video has separate audio but FFmpeg is not installed — downloading video without audio");
558                }
559
560                if has_audio {
561                    let video_tmp = opts.output_dir.join(format!(
562                        "{}_video_tmp.mp4",
563                        sanitize_filename::sanitize(&info.title)
564                    ));
565                    let audio_tmp = opts.output_dir.join(format!(
566                        "{}_audio_tmp.mp4",
567                        sanitize_filename::sanitize(&info.title)
568                    ));
569                    let output = opts
570                        .output_dir
571                        .join(format!("{}.mp4", sanitize_filename::sanitize(&info.title)));
572
573                    let _ = progress.send(0.0).await;
574
575                    let (vtx, mut vrx) = mpsc::channel::<f64>(8);
576                    let progress_video = progress.clone();
577                    tokio::spawn(async move {
578                        while let Some(p) = vrx.recv().await {
579                            let scaled = p * 0.6;
580                            let _ = progress_video.send(scaled).await;
581                        }
582                    });
583
584                    let video_bytes = self
585                        .download_video_with_fallback(&video_quality.url, &video_tmp, vtx)
586                        .await?;
587
588                    let _ = progress.send(60.0).await;
589
590                    let audio_url = &audio_quality.unwrap().url;
591                    let (atx, mut arx) = mpsc::channel::<f64>(8);
592                    let progress_audio = progress.clone();
593                    tokio::spawn(async move {
594                        while let Some(p) = arx.recv().await {
595                            let scaled = 60.0 + p * 0.25;
596                            let _ = progress_audio.send(scaled).await;
597                        }
598                    });
599
600                    let audio_ok = direct_downloader::download_direct(
601                        &self.client,
602                        audio_url,
603                        &audio_tmp,
604                        atx,
605                        None,
606                    )
607                    .await
608                    .is_ok();
609
610                    let _ = progress.send(85.0).await;
611
612                    if audio_ok && ffmpeg_available {
613                        ffmpeg::mux_video_audio(&video_tmp, &audio_tmp, &output).await?;
614                        let _ = tokio::fs::remove_file(&video_tmp).await;
615                        let _ = tokio::fs::remove_file(&audio_tmp).await;
616                        let _ = progress.send(100.0).await;
617
618                        let file_size = tokio::fs::metadata(&output).await?.len();
619                        Ok(DownloadResult {
620                            file_path: output,
621                            file_size_bytes: file_size,
622                            duration_seconds: info.duration_seconds.unwrap_or(0.0),
623                            torrent_id: None,
624                        })
625                    } else {
626                        let video_final = opts.output_dir.join(format!(
627                            "{}{}.mp4",
628                            sanitize_filename::sanitize(&info.title),
629                            if !audio_ok { "" } else { "_noaudio" }
630                        ));
631                        let _ = tokio::fs::rename(&video_tmp, &video_final).await;
632
633                        if audio_ok {
634                            let audio_final = opts.output_dir.join(format!(
635                                "{}_audio.mp4",
636                                sanitize_filename::sanitize(&info.title)
637                            ));
638                            let _ = tokio::fs::rename(&audio_tmp, &audio_final).await;
639                        } else {
640                            let _ = tokio::fs::remove_file(&audio_tmp).await;
641                        }
642
643                        let _ = progress.send(100.0).await;
644
645                        Ok(DownloadResult {
646                            file_path: video_final,
647                            file_size_bytes: video_bytes,
648                            duration_seconds: info.duration_seconds.unwrap_or(0.0),
649                            torrent_id: None,
650                        })
651                    }
652                } else {
653                    let output = opts
654                        .output_dir
655                        .join(format!("{}.mp4", sanitize_filename::sanitize(&info.title)));
656                    let bytes = self
657                        .download_video_with_fallback(&video_quality.url, &output, progress)
658                        .await?;
659
660                    Ok(DownloadResult {
661                        file_path: output,
662                        file_size_bytes: bytes,
663                        duration_seconds: info.duration_seconds.unwrap_or(0.0),
664                        torrent_id: None,
665                    })
666                }
667            }
668            MediaType::Gif => {
669                let url = &info
670                    .available_qualities
671                    .first()
672                    .ok_or_else(|| anyhow!("Nenhum URL GIF"))?
673                    .url;
674                let output = opts
675                    .output_dir
676                    .join(format!("{}.gif", sanitize_filename::sanitize(&info.title)));
677                let bytes =
678                    direct_downloader::download_direct(&self.client, url, &output, progress, None)
679                        .await?;
680
681                Ok(DownloadResult {
682                    file_path: output,
683                    file_size_bytes: bytes,
684                    duration_seconds: 0.0,
685                    torrent_id: None,
686                })
687            }
688            MediaType::Photo => {
689                let quality = info
690                    .available_qualities
691                    .first()
692                    .ok_or_else(|| anyhow!("Nenhum URL de imagem"))?;
693                let ext = &quality.format;
694                let output = opts.output_dir.join(format!(
695                    "{}.{}",
696                    sanitize_filename::sanitize(&info.title),
697                    ext
698                ));
699                let bytes = direct_downloader::download_direct(
700                    &self.client,
701                    &quality.url,
702                    &output,
703                    progress,
704                    None,
705                )
706                .await?;
707
708                Ok(DownloadResult {
709                    file_path: output,
710                    file_size_bytes: bytes,
711                    duration_seconds: 0.0,
712                    torrent_id: None,
713                })
714            }
715            MediaType::Carousel => {
716                let count = info.available_qualities.len();
717                let mut total_bytes = 0u64;
718                let mut last_path = opts.output_dir.clone();
719
720                for (i, quality) in info.available_qualities.iter().enumerate() {
721                    let filename = format!(
722                        "{}_{}.{}",
723                        sanitize_filename::sanitize(&info.title),
724                        i + 1,
725                        quality.format,
726                    );
727                    let output = opts.output_dir.join(&filename);
728                    let (tx, _rx) = mpsc::channel(8);
729
730                    let bytes = direct_downloader::download_direct(
731                        &self.client,
732                        &quality.url,
733                        &output,
734                        tx,
735                        None,
736                    )
737                    .await?;
738
739                    total_bytes += bytes;
740                    last_path = output;
741
742                    let percent = ((i + 1) as f64 / count as f64) * 100.0;
743                    let _ = progress.send(percent).await;
744                }
745
746                Ok(DownloadResult {
747                    file_path: last_path,
748                    file_size_bytes: total_bytes,
749                    duration_seconds: 0.0,
750                    torrent_id: None,
751                })
752            }
753            _ => Err(anyhow!("Unsupported media type")),
754        }
755    }
756}