Skip to main content

mangofetch_core/platforms/reddit/
mod.rs

1use anyhow::anyhow;
2use async_trait::async_trait;
3use tokio::sync::mpsc;
4
5use crate::core::direct_downloader;
6use crate::core::ffmpeg;
7use crate::core::redirect;
8use crate::models::media::{DownloadOptions, DownloadResult, MediaInfo, MediaType, VideoQuality};
9use crate::platforms::traits::PlatformDownloader;
10
11const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36";
12
13pub struct RedditDownloader {
14    client: reqwest::Client,
15}
16
17enum RedditMedia {
18    Video {
19        video_url: String,
20        duration: Option<f64>,
21    },
22    Gif {
23        url: String,
24    },
25    Image {
26        url: String,
27    },
28    Gallery {
29        items: Vec<GalleryItem>,
30    },
31}
32
33struct GalleryItem {
34    url: String,
35    ext: String,
36}
37
38impl Default for RedditDownloader {
39    fn default() -> Self {
40        Self::new()
41    }
42}
43
44impl RedditDownloader {
45    pub fn new() -> Self {
46        let mut builder = crate::core::http_client::apply_global_proxy(reqwest::Client::builder())
47            .user_agent(USER_AGENT)
48            .timeout(std::time::Duration::from_secs(120))
49            .connect_timeout(std::time::Duration::from_secs(15));
50
51        if let Some(jar) =
52            crate::core::cookie_parser::load_extension_cookies_for_domain("reddit.com")
53        {
54            builder = builder.cookie_provider(jar);
55        }
56
57        let client = builder.build().unwrap_or_default();
58        Self { client }
59    }
60
61    fn extract_post_id(url: &str) -> Option<String> {
62        let parsed = url::Url::parse(url).ok()?;
63        let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
64
65        if segments.len() >= 4 && segments[0] == "r" && segments[2] == "comments" {
66            return Some(segments[3].to_string());
67        }
68
69        if segments.first() == Some(&"comments") {
70            return segments.get(1).map(|s| s.to_string());
71        }
72
73        if segments.first() == Some(&"video") {
74            return segments.get(1).map(|s| s.to_string());
75        }
76
77        None
78    }
79
80    fn extract_subreddit(url: &str) -> Option<String> {
81        let parsed = url::Url::parse(url).ok()?;
82        let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
83        if segments.first() == Some(&"r") {
84            return segments.get(1).map(|s| s.to_string());
85        }
86        None
87    }
88
89    fn is_short_link(url: &str) -> bool {
90        if let Ok(parsed) = url::Url::parse(url) {
91            if let Some(host) = parsed.host_str() {
92                let host = host.to_lowercase();
93                return host == "v.redd.it" || host == "redd.it";
94            }
95        }
96        false
97    }
98
99    fn is_share_link(url: &str) -> bool {
100        if let Ok(parsed) = url::Url::parse(url) {
101            let segments: Vec<&str> = parsed.path().split('/').filter(|s| !s.is_empty()).collect();
102            return segments.len() >= 4 && segments[0] == "r" && segments[2] == "s";
103        }
104        false
105    }
106
107    async fn resolve_to_canonical(&self, url: &str) -> anyhow::Result<String> {
108        if Self::is_short_link(url) {
109            return redirect::resolve_redirect(&self.client, url).await;
110        }
111
112        if Self::is_share_link(url) {
113            return redirect::resolve_redirect(&self.client, url).await;
114        }
115
116        Ok(url.to_string())
117    }
118
119    async fn fetch_post_data(&self, post_id: &str) -> anyhow::Result<serde_json::Value> {
120        let url = format!("https://www.reddit.com/comments/{}.json", post_id);
121
122        let response = self
123            .client
124            .get(&url)
125            .header("Accept", "application/json")
126            .send()
127            .await?;
128
129        if !response.status().is_success() {
130            return Err(anyhow!("Reddit retornou HTTP {}", response.status()));
131        }
132
133        let json: serde_json::Value = response.json().await?;
134
135        if !json.is_array() {
136            return Err(anyhow!("Post not found"));
137        }
138
139        json.as_array()
140            .and_then(|arr| arr.first())
141            .and_then(|listing| listing.pointer("/data/children/0/data"))
142            .cloned()
143            .ok_or_else(|| anyhow!("Post not found"))
144    }
145
146    fn construct_audio_url(fallback_url: &str) -> Vec<String> {
147        let video = fallback_url.split('?').next().unwrap_or(fallback_url);
148        let mut candidates = Vec::new();
149
150        if video.contains(".mp4") {
151            if let Some(base) = video.split('_').next() {
152                candidates.push(format!("{}_audio.mp4", base));
153                candidates.push(format!("{}_AUDIO_128.mp4", base));
154            }
155        }
156
157        if let Some(dash_pos) = video.find("DASH") {
158            candidates.push(format!("{}audio", &video[..dash_pos]));
159        }
160
161        candidates
162    }
163
164    async fn find_audio_url(&self, fallback_url: &str) -> Option<String> {
165        let candidates = Self::construct_audio_url(fallback_url);
166
167        for candidate in candidates {
168            let resp = tokio::time::timeout(
169                std::time::Duration::from_secs(5),
170                self.client.head(&candidate).send(),
171            )
172            .await;
173
174            if let Ok(Ok(r)) = resp {
175                if r.status().is_success() {
176                    return Some(candidate);
177                }
178            }
179        }
180
181        None
182    }
183
184    fn get_resolution_variants(video_url: &str) -> Vec<String> {
185        let resolutions = [
186            "DASH_720.mp4",
187            "DASH_480.mp4",
188            "DASH_360.mp4",
189            "DASH_240.mp4",
190        ];
191        let mut variants = vec![video_url.to_string()];
192        for res in &resolutions {
193            if !video_url.contains(res) {
194                if let Some(base) = video_url.rfind("DASH_") {
195                    let mut variant = video_url[..base].to_string();
196                    variant.push_str(res);
197                    variants.push(variant);
198                }
199            }
200        }
201        variants
202    }
203
204    async fn download_video_with_fallback(
205        &self,
206        video_url: &str,
207        output: &std::path::Path,
208        progress_tx: mpsc::Sender<f64>,
209    ) -> anyhow::Result<u64> {
210        let variants = Self::get_resolution_variants(video_url);
211        let mut last_err = anyhow!("No resolution available");
212
213        for variant in &variants {
214            match direct_downloader::download_direct(
215                &self.client,
216                variant,
217                output,
218                progress_tx.clone(),
219                None,
220            )
221            .await
222            {
223                Ok(bytes) => return Ok(bytes),
224                Err(e) => {
225                    last_err = e;
226                    let _ = tokio::fs::remove_file(output).await;
227                }
228            }
229        }
230
231        Err(last_err)
232    }
233
234    fn parse_media(data: &serde_json::Value) -> Option<RedditMedia> {
235        let is_gallery = data
236            .get("is_gallery")
237            .and_then(|v| v.as_bool())
238            .unwrap_or(false);
239        if is_gallery {
240            if let Some(gallery) = Self::parse_gallery(data) {
241                return Some(gallery);
242            }
243        }
244
245        if let Some(url) = data.get("url").and_then(|v| v.as_str()) {
246            if url.ends_with(".gif") {
247                return Some(RedditMedia::Gif {
248                    url: url.to_string(),
249                });
250            }
251        }
252
253        if let Some(reddit_video) = data.pointer("/secure_media/reddit_video") {
254            let fallback = reddit_video.get("fallback_url").and_then(|v| v.as_str())?;
255            let duration = reddit_video.get("duration").and_then(|v| v.as_f64());
256            let video_url = fallback.split('?').next().unwrap_or(fallback).to_string();
257
258            return Some(RedditMedia::Video {
259                video_url,
260                duration,
261            });
262        }
263
264        if let Some(url) = data.get("url").and_then(|v| v.as_str()) {
265            let is_media = data
266                .get("is_reddit_media_domain")
267                .and_then(|v| v.as_bool())
268                .unwrap_or(false);
269            if is_media
270                || url.contains("i.redd.it")
271                || url.ends_with(".jpg")
272                || url.ends_with(".png")
273                || url.ends_with(".jpeg")
274            {
275                return Some(RedditMedia::Image {
276                    url: url.to_string(),
277                });
278            }
279        }
280
281        None
282    }
283
284    fn parse_gallery(data: &serde_json::Value) -> Option<RedditMedia> {
285        let gallery_data = data.get("gallery_data")?.get("items")?.as_array()?;
286        let media_metadata = data.get("media_metadata")?;
287
288        let mut items = Vec::new();
289
290        for item in gallery_data {
291            let media_id = item.get("media_id").and_then(|v| v.as_str())?;
292            let meta = media_metadata.get(media_id)?;
293
294            let mime = meta
295                .get("m")
296                .and_then(|v| v.as_str())
297                .unwrap_or("image/jpeg");
298            let ext = match mime {
299                "image/png" => "png",
300                "image/gif" => "gif",
301                "image/webp" => "webp",
302                _ => "jpg",
303            };
304
305            let url = if let Some(source) = meta.get("s") {
306                source
307                    .get("u")
308                    .or_else(|| source.get("gif"))
309                    .and_then(|v| v.as_str())
310                    .map(|u| u.replace("&amp;", "&"))
311            } else {
312                None
313            };
314
315            if let Some(url) = url {
316                items.push(GalleryItem {
317                    url,
318                    ext: ext.to_string(),
319                });
320            }
321        }
322
323        if items.is_empty() {
324            return None;
325        }
326
327        Some(RedditMedia::Gallery { items })
328    }
329}
330
331#[async_trait]
332impl PlatformDownloader for RedditDownloader {
333    fn name(&self) -> &str {
334        "reddit"
335    }
336
337    fn can_handle(&self, url: &str) -> bool {
338        if let Ok(parsed) = url::Url::parse(url) {
339            if let Some(host) = parsed.host_str() {
340                let host = host.to_lowercase();
341                return host == "reddit.com"
342                    || host.ends_with(".reddit.com")
343                    || host == "v.redd.it"
344                    || host == "redd.it";
345            }
346        }
347        false
348    }
349
350    async fn get_media_info(&self, url: &str) -> anyhow::Result<MediaInfo> {
351        match self.native_get_media_info(url).await {
352            Ok(info) => Ok(info),
353            Err(native_err) => {
354                tracing::warn!(
355                    "[reddit] native failed: {}, trying yt-dlp fallback",
356                    native_err
357                );
358                self.fallback_ytdlp(url).await.map_err(|_| native_err)
359            }
360        }
361    }
362
363    async fn download(
364        &self,
365        info: &MediaInfo,
366        opts: &DownloadOptions,
367        progress: mpsc::Sender<f64>,
368    ) -> anyhow::Result<DownloadResult> {
369        if let Some(quality) = info.available_qualities.first() {
370            if quality.format == "ytdlp" {
371                let ytdlp_path = crate::core::ytdlp::ensure_ytdlp(None).await?;
372                return crate::core::ytdlp::download_video(
373                    &ytdlp_path,
374                    &quality.url,
375                    &opts.output_dir,
376                    None,
377                    progress,
378                    opts.download_mode.as_deref(),
379                    opts.format_id.as_deref(),
380                    opts.filename_template.as_deref(),
381                    opts.referer.as_deref().or(Some("https://www.reddit.com/")),
382                    opts.cancel_token.clone(),
383                    None,
384                    opts.concurrent_fragments,
385                    false,
386                    &[],
387                )
388                .await;
389            }
390        }
391
392        self.native_download(info, opts, progress).await
393    }
394}
395
396impl RedditDownloader {
397    async fn fallback_ytdlp(&self, url: &str) -> anyhow::Result<MediaInfo> {
398        let ytdlp_path = crate::core::ytdlp::ensure_ytdlp(None).await?;
399        let json = crate::core::ytdlp::get_video_info(&ytdlp_path, url, &[]).await?;
400        crate::platforms::generic_ytdlp::GenericYtdlpDownloader::parse_video_info(&json)
401    }
402
403    async fn native_get_media_info(&self, url: &str) -> anyhow::Result<MediaInfo> {
404        let canonical = self.resolve_to_canonical(url).await?;
405
406        let post_id = Self::extract_post_id(&canonical)
407            .ok_or_else(|| anyhow!("Could not extract post ID"))?;
408
409        let subreddit = Self::extract_subreddit(&canonical).unwrap_or_default();
410
411        let data = self.fetch_post_data(&post_id).await?;
412
413        let media = Self::parse_media(&data).ok_or_else(|| anyhow!("No media found in post"))?;
414
415        let source_id = if subreddit.is_empty() {
416            post_id.clone()
417        } else {
418            format!("{}_{}", subreddit.to_lowercase(), post_id)
419        };
420
421        let title = format!("reddit_{}", source_id);
422
423        match media {
424            RedditMedia::Video {
425                video_url,
426                duration,
427            } => {
428                let audio = self.find_audio_url(&video_url).await;
429                let mut qualities = vec![VideoQuality {
430                    label: "video".to_string(),
431                    width: 0,
432                    height: 0,
433                    url: video_url,
434                    format: "mp4".to_string(),
435                }];
436
437                if let Some(audio_url) = audio {
438                    qualities.push(VideoQuality {
439                        label: "audio".to_string(),
440                        width: 0,
441                        height: 0,
442                        url: audio_url,
443                        format: "mp4_audio".to_string(),
444                    });
445                }
446
447                Ok(MediaInfo {
448                    title,
449                    author: subreddit,
450                    platform: "reddit".to_string(),
451                    duration_seconds: duration,
452                    thumbnail_url: None,
453                    available_qualities: qualities,
454                    media_type: MediaType::Video,
455                    file_size_bytes: None,
456                })
457            }
458            RedditMedia::Gif { url: gif_url } => Ok(MediaInfo {
459                title,
460                author: subreddit,
461                platform: "reddit".to_string(),
462                duration_seconds: None,
463                thumbnail_url: None,
464                available_qualities: vec![VideoQuality {
465                    label: "original".to_string(),
466                    width: 0,
467                    height: 0,
468                    url: gif_url,
469                    format: "gif".to_string(),
470                }],
471                media_type: MediaType::Gif,
472                file_size_bytes: None,
473            }),
474            RedditMedia::Image { url: image_url } => {
475                let ext = if image_url.ends_with(".png") {
476                    "png"
477                } else {
478                    "jpg"
479                };
480                Ok(MediaInfo {
481                    title,
482                    author: subreddit,
483                    platform: "reddit".to_string(),
484                    duration_seconds: None,
485                    thumbnail_url: None,
486                    available_qualities: vec![VideoQuality {
487                        label: "original".to_string(),
488                        width: 0,
489                        height: 0,
490                        url: image_url,
491                        format: ext.to_string(),
492                    }],
493                    media_type: MediaType::Photo,
494                    file_size_bytes: None,
495                })
496            }
497            RedditMedia::Gallery { items } => {
498                let qualities: Vec<VideoQuality> = items
499                    .into_iter()
500                    .enumerate()
501                    .map(|(i, item)| VideoQuality {
502                        label: format!("media_{}", i + 1),
503                        width: 0,
504                        height: 0,
505                        url: item.url,
506                        format: item.ext,
507                    })
508                    .collect();
509
510                Ok(MediaInfo {
511                    title,
512                    author: subreddit,
513                    platform: "reddit".to_string(),
514                    duration_seconds: None,
515                    thumbnail_url: None,
516                    available_qualities: qualities,
517                    media_type: MediaType::Carousel,
518                    file_size_bytes: None,
519                })
520            }
521        }
522    }
523
524    async fn native_download(
525        &self,
526        info: &MediaInfo,
527        opts: &DownloadOptions,
528        progress: mpsc::Sender<f64>,
529    ) -> anyhow::Result<DownloadResult> {
530        match info.media_type {
531            MediaType::Video => {
532                let video_quality = info
533                    .available_qualities
534                    .iter()
535                    .find(|q| q.label == "video")
536                    .ok_or_else(|| anyhow!("No video URL"))?;
537
538                let audio_quality = info.available_qualities.iter().find(|q| q.label == "audio");
539
540                let has_audio = audio_quality.is_some();
541                let ffmpeg_available = ffmpeg::is_ffmpeg_available().await;
542
543                if has_audio && !ffmpeg_available {
544                    tracing::warn!("[reddit] Video has separate audio but FFmpeg is not installed — downloading video without audio");
545                }
546
547                if has_audio {
548                    let video_tmp = opts.output_dir.join(format!(
549                        "{}_video_tmp.mp4",
550                        sanitize_filename::sanitize(&info.title)
551                    ));
552                    let audio_tmp = opts.output_dir.join(format!(
553                        "{}_audio_tmp.mp4",
554                        sanitize_filename::sanitize(&info.title)
555                    ));
556                    let output = opts
557                        .output_dir
558                        .join(format!("{}.mp4", sanitize_filename::sanitize(&info.title)));
559
560                    let _ = progress.send(0.0).await;
561
562                    let (vtx, mut vrx) = mpsc::channel::<f64>(8);
563                    let progress_video = progress.clone();
564                    tokio::spawn(async move {
565                        while let Some(p) = vrx.recv().await {
566                            let scaled = p * 0.6;
567                            let _ = progress_video.send(scaled).await;
568                        }
569                    });
570
571                    let video_bytes = self
572                        .download_video_with_fallback(&video_quality.url, &video_tmp, vtx)
573                        .await?;
574
575                    let _ = progress.send(60.0).await;
576
577                    let audio_url = &audio_quality.unwrap().url;
578                    let (atx, mut arx) = mpsc::channel::<f64>(8);
579                    let progress_audio = progress.clone();
580                    tokio::spawn(async move {
581                        while let Some(p) = arx.recv().await {
582                            let scaled = 60.0 + p * 0.25;
583                            let _ = progress_audio.send(scaled).await;
584                        }
585                    });
586
587                    let audio_ok = direct_downloader::download_direct(
588                        &self.client,
589                        audio_url,
590                        &audio_tmp,
591                        atx,
592                        None,
593                    )
594                    .await
595                    .is_ok();
596
597                    let _ = progress.send(85.0).await;
598
599                    if audio_ok && ffmpeg_available {
600                        ffmpeg::mux_video_audio(&video_tmp, &audio_tmp, &output).await?;
601                        let _ = tokio::fs::remove_file(&video_tmp).await;
602                        let _ = tokio::fs::remove_file(&audio_tmp).await;
603                        let _ = progress.send(100.0).await;
604
605                        let file_size = tokio::fs::metadata(&output).await?.len();
606                        Ok(DownloadResult {
607                            file_path: output,
608                            file_size_bytes: file_size,
609                            duration_seconds: info.duration_seconds.unwrap_or(0.0),
610                            torrent_id: None,
611                        })
612                    } else {
613                        let video_final = opts.output_dir.join(format!(
614                            "{}{}.mp4",
615                            sanitize_filename::sanitize(&info.title),
616                            if !audio_ok { "" } else { "_noaudio" }
617                        ));
618                        let _ = tokio::fs::rename(&video_tmp, &video_final).await;
619
620                        if audio_ok {
621                            let audio_final = opts.output_dir.join(format!(
622                                "{}_audio.mp4",
623                                sanitize_filename::sanitize(&info.title)
624                            ));
625                            let _ = tokio::fs::rename(&audio_tmp, &audio_final).await;
626                        } else {
627                            let _ = tokio::fs::remove_file(&audio_tmp).await;
628                        }
629
630                        let _ = progress.send(100.0).await;
631
632                        Ok(DownloadResult {
633                            file_path: video_final,
634                            file_size_bytes: video_bytes,
635                            duration_seconds: info.duration_seconds.unwrap_or(0.0),
636                            torrent_id: None,
637                        })
638                    }
639                } else {
640                    let output = opts
641                        .output_dir
642                        .join(format!("{}.mp4", sanitize_filename::sanitize(&info.title)));
643                    let bytes = self
644                        .download_video_with_fallback(&video_quality.url, &output, progress)
645                        .await?;
646
647                    Ok(DownloadResult {
648                        file_path: output,
649                        file_size_bytes: bytes,
650                        duration_seconds: info.duration_seconds.unwrap_or(0.0),
651                        torrent_id: None,
652                    })
653                }
654            }
655            MediaType::Gif => {
656                let url = &info
657                    .available_qualities
658                    .first()
659                    .ok_or_else(|| anyhow!("Nenhum URL GIF"))?
660                    .url;
661                let output = opts
662                    .output_dir
663                    .join(format!("{}.gif", sanitize_filename::sanitize(&info.title)));
664                let bytes =
665                    direct_downloader::download_direct(&self.client, url, &output, progress, None)
666                        .await?;
667
668                Ok(DownloadResult {
669                    file_path: output,
670                    file_size_bytes: bytes,
671                    duration_seconds: 0.0,
672                    torrent_id: None,
673                })
674            }
675            MediaType::Photo => {
676                let quality = info
677                    .available_qualities
678                    .first()
679                    .ok_or_else(|| anyhow!("Nenhum URL de imagem"))?;
680                let ext = &quality.format;
681                let output = opts.output_dir.join(format!(
682                    "{}.{}",
683                    sanitize_filename::sanitize(&info.title),
684                    ext
685                ));
686                let bytes = direct_downloader::download_direct(
687                    &self.client,
688                    &quality.url,
689                    &output,
690                    progress,
691                    None,
692                )
693                .await?;
694
695                Ok(DownloadResult {
696                    file_path: output,
697                    file_size_bytes: bytes,
698                    duration_seconds: 0.0,
699                    torrent_id: None,
700                })
701            }
702            MediaType::Carousel => {
703                let count = info.available_qualities.len();
704                let mut total_bytes = 0u64;
705                let mut last_path = opts.output_dir.clone();
706
707                for (i, quality) in info.available_qualities.iter().enumerate() {
708                    let filename = format!(
709                        "{}_{}.{}",
710                        sanitize_filename::sanitize(&info.title),
711                        i + 1,
712                        quality.format,
713                    );
714                    let output = opts.output_dir.join(&filename);
715                    let (tx, _rx) = mpsc::channel(8);
716
717                    let bytes = direct_downloader::download_direct(
718                        &self.client,
719                        &quality.url,
720                        &output,
721                        tx,
722                        None,
723                    )
724                    .await?;
725
726                    total_bytes += bytes;
727                    last_path = output;
728
729                    let percent = ((i + 1) as f64 / count as f64) * 100.0;
730                    let _ = progress.send(percent).await;
731                }
732
733                Ok(DownloadResult {
734                    file_path: last_path,
735                    file_size_bytes: total_bytes,
736                    duration_seconds: 0.0,
737                    torrent_id: None,
738                })
739            }
740            _ => Err(anyhow!("Unsupported media type")),
741        }
742    }
743}