1use bytes::Bytes;
6use serde::{Deserialize, Serialize};
7use thiserror::Error;
8
9#[derive(Error, Debug)]
15pub enum MediaError {
16 #[error("Failed to download media: {0}")]
17 Download(String),
18
19 #[error("Network error: {0}")]
20 Network(String),
21
22 #[error("HTTP error {0}: {1}")]
23 Http(u16, String),
24
25 #[error("Invalid URL: {0}")]
26 InvalidUrl(String),
27
28 #[error("Unsupported media type: {0}")]
29 UnsupportedType(String),
30
31 #[error("File too large: {0} bytes (max: {1})")]
32 FileTooLarge(u64, u64),
33
34 #[error("Timeout downloading: {0}")]
35 Timeout(String),
36
37 #[error("IO error: {0}")]
38 Io(String),
39
40 #[error("Parse error: {0}")]
41 Parse(String),
42}
43
44pub type MediaResult<T> = Result<T, MediaError>;
46
47#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
53#[serde(rename_all = "lowercase")]
54pub enum MediaType {
55 Image,
56 Video,
57 Audio,
58 Document,
59 Embedded,
60 Other,
61}
62
63impl MediaType {
64 pub fn from_extension(ext: &str) -> Self {
66 let ext = ext.to_lowercase();
67 match ext.as_str() {
68 "jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" | "ico" | "bmp"
70 | "avif" | "heic" | "heif" | "tiff" | "tif" => MediaType::Image,
71
72 "mp4" | "webm" | "ogg" | "ogv" | "avi" | "mov" | "mkv" | "m4v"
74 | "wmv" | "flv" | "3gp" => MediaType::Video,
75
76 "mp3" | "wav" | "oga" | "flac" | "aac" | "m4a" | "wma"
78 | "opus" | "aiff" => MediaType::Audio,
79
80 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx"
82 | "txt" | "rtf" | "odt" | "ods" | "odp" | "csv" | "epub" => MediaType::Document,
83
84 _ => MediaType::Other,
85 }
86 }
87
88 pub fn from_mime(mime: &str) -> Self {
90 let mime_lower = mime.to_lowercase();
91 if mime_lower.starts_with("image/") {
92 MediaType::Image
93 } else if mime_lower.starts_with("video/") {
94 MediaType::Video
95 } else if mime_lower.starts_with("audio/") {
96 MediaType::Audio
97 } else if mime_lower.starts_with("application/pdf")
98 || mime_lower.contains("document")
99 || mime_lower.contains("spreadsheet")
100 || mime_lower.contains("presentation")
101 {
102 MediaType::Document
103 } else {
104 MediaType::Other
105 }
106 }
107}
108
109impl std::fmt::Display for MediaType {
110 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
111 match self {
112 MediaType::Image => write!(f, "image"),
113 MediaType::Video => write!(f, "video"),
114 MediaType::Audio => write!(f, "audio"),
115 MediaType::Document => write!(f, "document"),
116 MediaType::Embedded => write!(f, "embedded"),
117 MediaType::Other => write!(f, "other"),
118 }
119 }
120}
121
122#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
128#[serde(rename_all = "lowercase")]
129pub enum ImageFormat {
130 Jpeg,
131 Png,
132 Gif,
133 WebP,
134 Svg,
135 Avif,
136 Heic,
137 Ico,
138 Bmp,
139 Tiff,
140 Unknown,
141}
142
143impl ImageFormat {
144 pub fn from_extension(ext: &str) -> Self {
145 match ext.to_lowercase().as_str() {
146 "jpg" | "jpeg" => ImageFormat::Jpeg,
147 "png" => ImageFormat::Png,
148 "gif" => ImageFormat::Gif,
149 "webp" => ImageFormat::WebP,
150 "svg" => ImageFormat::Svg,
151 "avif" => ImageFormat::Avif,
152 "heic" | "heif" => ImageFormat::Heic,
153 "ico" => ImageFormat::Ico,
154 "bmp" => ImageFormat::Bmp,
155 "tiff" | "tif" => ImageFormat::Tiff,
156 _ => ImageFormat::Unknown,
157 }
158 }
159
160 pub fn from_mime(mime: &str) -> Self {
161 match mime.to_lowercase().as_str() {
162 "image/jpeg" => ImageFormat::Jpeg,
163 "image/png" => ImageFormat::Png,
164 "image/gif" => ImageFormat::Gif,
165 "image/webp" => ImageFormat::WebP,
166 "image/svg+xml" => ImageFormat::Svg,
167 "image/avif" => ImageFormat::Avif,
168 "image/heic" | "image/heif" => ImageFormat::Heic,
169 "image/x-icon" | "image/vnd.microsoft.icon" => ImageFormat::Ico,
170 "image/bmp" => ImageFormat::Bmp,
171 "image/tiff" => ImageFormat::Tiff,
172 _ => ImageFormat::Unknown,
173 }
174 }
175
176 pub fn mime_type(&self) -> &'static str {
177 match self {
178 ImageFormat::Jpeg => "image/jpeg",
179 ImageFormat::Png => "image/png",
180 ImageFormat::Gif => "image/gif",
181 ImageFormat::WebP => "image/webp",
182 ImageFormat::Svg => "image/svg+xml",
183 ImageFormat::Avif => "image/avif",
184 ImageFormat::Heic => "image/heic",
185 ImageFormat::Ico => "image/x-icon",
186 ImageFormat::Bmp => "image/bmp",
187 ImageFormat::Tiff => "image/tiff",
188 ImageFormat::Unknown => "application/octet-stream",
189 }
190 }
191}
192
193#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
195#[serde(rename_all = "lowercase")]
196pub enum ImageLoading {
197 #[default]
198 Eager,
199 Lazy,
200}
201
202#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
204pub struct SrcsetEntry {
205 pub url: String,
207 pub width: Option<u32>,
209 pub density: Option<f32>,
211}
212
213#[derive(Debug, Clone, Serialize, Deserialize)]
215pub struct ImageMedia {
216 pub src: String,
218 pub absolute_url: Option<String>,
220 pub alt: Option<String>,
222 pub title: Option<String>,
224 pub width: Option<u32>,
226 pub height: Option<u32>,
228 pub format: ImageFormat,
230 pub mime_type: Option<String>,
232 pub loading: ImageLoading,
234 pub is_decorative: bool,
236 pub srcset: Vec<SrcsetEntry>,
238 pub sizes: Option<String>,
240 pub data_src: Option<String>,
242 pub is_placeholder: bool,
244 pub size_bytes: Option<usize>,
246 pub content_hash: Option<String>,
248 pub classes: Vec<String>,
250 pub id: Option<String>,
252}
253
254impl Default for ImageMedia {
255 fn default() -> Self {
256 Self {
257 src: String::new(),
258 absolute_url: None,
259 alt: None,
260 title: None,
261 width: None,
262 height: None,
263 format: ImageFormat::Unknown,
264 mime_type: None,
265 loading: ImageLoading::Eager,
266 is_decorative: false,
267 srcset: Vec::new(),
268 sizes: None,
269 data_src: None,
270 is_placeholder: false,
271 size_bytes: None,
272 content_hash: None,
273 classes: Vec::new(),
274 id: None,
275 }
276 }
277}
278
279#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
285#[serde(rename_all = "lowercase")]
286pub enum VideoPlatform {
287 YouTube,
288 Vimeo,
289 Dailymotion,
290 Twitch,
291 Facebook,
292 Twitter,
293 TikTok,
294 Wistia,
295 Brightcove,
296 JWPlayer,
297 VideoJs,
298 Html5,
299 Other,
300}
301
302impl VideoPlatform {
303 pub fn from_url(url: &str) -> Self {
305 let url_lower = url.to_lowercase();
306 if url_lower.contains("youtube.com") || url_lower.contains("youtu.be") {
307 VideoPlatform::YouTube
308 } else if url_lower.contains("vimeo.com") {
309 VideoPlatform::Vimeo
310 } else if url_lower.contains("dailymotion.com") || url_lower.contains("dai.ly") {
311 VideoPlatform::Dailymotion
312 } else if url_lower.contains("twitch.tv") {
313 VideoPlatform::Twitch
314 } else if url_lower.contains("facebook.com") || url_lower.contains("fb.watch") {
315 VideoPlatform::Facebook
316 } else if url_lower.contains("twitter.com") || url_lower.contains("x.com") {
317 VideoPlatform::Twitter
318 } else if url_lower.contains("tiktok.com") {
319 VideoPlatform::TikTok
320 } else if url_lower.contains("wistia.com") || url_lower.contains("wistia.net") {
321 VideoPlatform::Wistia
322 } else if url_lower.contains("brightcove") {
323 VideoPlatform::Brightcove
324 } else if url_lower.contains("jwplayer") || url_lower.contains("jwplatform") {
325 VideoPlatform::JWPlayer
326 } else {
327 VideoPlatform::Other
328 }
329 }
330}
331
332#[derive(Debug, Clone, Serialize, Deserialize)]
334pub struct VideoMedia {
335 pub src: String,
337 pub absolute_url: Option<String>,
339 pub platform: VideoPlatform,
341 pub video_id: Option<String>,
343 pub poster: Option<String>,
345 pub width: Option<u32>,
347 pub height: Option<u32>,
349 pub duration: Option<f64>,
351 pub mime_type: Option<String>,
353 pub title: Option<String>,
355 pub sources: Vec<VideoSource>,
357 pub tracks: Vec<VideoTrack>,
359 pub autoplay: bool,
361 pub loop_video: bool,
363 pub muted: bool,
365 pub controls: bool,
367 pub playsinline: bool,
369 pub embed_url: Option<String>,
371 pub size_bytes: Option<usize>,
373}
374
375impl Default for VideoMedia {
376 fn default() -> Self {
377 Self {
378 src: String::new(),
379 absolute_url: None,
380 platform: VideoPlatform::Html5,
381 video_id: None,
382 poster: None,
383 width: None,
384 height: None,
385 duration: None,
386 mime_type: None,
387 title: None,
388 sources: Vec::new(),
389 tracks: Vec::new(),
390 autoplay: false,
391 loop_video: false,
392 muted: false,
393 controls: true,
394 playsinline: false,
395 embed_url: None,
396 size_bytes: None,
397 }
398 }
399}
400
401#[derive(Debug, Clone, Serialize, Deserialize)]
403pub struct VideoSource {
404 pub src: String,
405 pub mime_type: Option<String>,
406 pub quality: Option<String>,
407}
408
409#[derive(Debug, Clone, Serialize, Deserialize)]
411pub struct VideoTrack {
412 pub src: String,
413 pub kind: TrackKind,
414 pub label: Option<String>,
415 pub srclang: Option<String>,
416 pub is_default: bool,
417}
418
419#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
421#[serde(rename_all = "lowercase")]
422#[derive(Default)]
423pub enum TrackKind {
424 #[default]
425 Subtitles,
426 Captions,
427 Descriptions,
428 Chapters,
429 Metadata,
430}
431
432
433#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
439#[serde(rename_all = "lowercase")]
440pub enum AudioPlatform {
441 Spotify,
442 SoundCloud,
443 ApplePodcasts,
444 Anchor,
445 Podbean,
446 Buzzsprout,
447 Html5,
448 Other,
449}
450
451impl AudioPlatform {
452 pub fn from_url(url: &str) -> Self {
453 let url_lower = url.to_lowercase();
454 if url_lower.contains("spotify.com") || url_lower.contains("open.spotify") {
455 AudioPlatform::Spotify
456 } else if url_lower.contains("soundcloud.com") {
457 AudioPlatform::SoundCloud
458 } else if url_lower.contains("podcasts.apple.com") {
459 AudioPlatform::ApplePodcasts
460 } else if url_lower.contains("anchor.fm") {
461 AudioPlatform::Anchor
462 } else if url_lower.contains("podbean.com") {
463 AudioPlatform::Podbean
464 } else if url_lower.contains("buzzsprout.com") {
465 AudioPlatform::Buzzsprout
466 } else {
467 AudioPlatform::Other
468 }
469 }
470}
471
472#[derive(Debug, Clone, Serialize, Deserialize)]
474pub struct AudioMedia {
475 pub src: String,
477 pub absolute_url: Option<String>,
479 pub platform: AudioPlatform,
481 pub title: Option<String>,
483 pub artist: Option<String>,
485 pub album: Option<String>,
487 pub duration: Option<f64>,
489 pub mime_type: Option<String>,
491 pub sources: Vec<AudioSource>,
493 pub autoplay: bool,
495 pub loop_audio: bool,
497 pub muted: bool,
499 pub controls: bool,
501 pub embed_url: Option<String>,
503 pub size_bytes: Option<usize>,
505}
506
507impl Default for AudioMedia {
508 fn default() -> Self {
509 Self {
510 src: String::new(),
511 absolute_url: None,
512 platform: AudioPlatform::Html5,
513 title: None,
514 artist: None,
515 album: None,
516 duration: None,
517 mime_type: None,
518 sources: Vec::new(),
519 autoplay: false,
520 loop_audio: false,
521 muted: false,
522 controls: true,
523 embed_url: None,
524 size_bytes: None,
525 }
526 }
527}
528
529#[derive(Debug, Clone, Serialize, Deserialize)]
531pub struct AudioSource {
532 pub src: String,
533 pub mime_type: Option<String>,
534}
535
536#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
542#[serde(rename_all = "lowercase")]
543pub enum DocumentType {
544 Pdf,
545 Word,
546 Excel,
547 PowerPoint,
548 Text,
549 Csv,
550 Epub,
551 Other,
552}
553
554impl DocumentType {
555 pub fn from_extension(ext: &str) -> Self {
556 match ext.to_lowercase().as_str() {
557 "pdf" => DocumentType::Pdf,
558 "doc" | "docx" | "odt" | "rtf" => DocumentType::Word,
559 "xls" | "xlsx" | "ods" => DocumentType::Excel,
560 "ppt" | "pptx" | "odp" => DocumentType::PowerPoint,
561 "txt" => DocumentType::Text,
562 "csv" => DocumentType::Csv,
563 "epub" => DocumentType::Epub,
564 _ => DocumentType::Other,
565 }
566 }
567}
568
569#[derive(Debug, Clone, Serialize, Deserialize)]
571pub struct DocumentMedia {
572 pub url: String,
574 pub absolute_url: Option<String>,
576 pub doc_type: DocumentType,
578 pub filename: Option<String>,
580 pub title: Option<String>,
582 pub mime_type: Option<String>,
584 pub size_bytes: Option<usize>,
586 pub page_count: Option<u32>,
588}
589
590impl Default for DocumentMedia {
591 fn default() -> Self {
592 Self {
593 url: String::new(),
594 absolute_url: None,
595 doc_type: DocumentType::Other,
596 filename: None,
597 title: None,
598 mime_type: None,
599 size_bytes: None,
600 page_count: None,
601 }
602 }
603}
604
605#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
611#[serde(rename_all = "lowercase")]
612pub enum EmbedType {
613 Iframe,
614 Object,
615 Embed,
616 Script,
617}
618
619#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
621#[serde(rename_all = "lowercase")]
622pub enum EmbedPlatform {
623 YouTube,
624 Vimeo,
625 Dailymotion,
626 Twitch,
627 Wistia,
628 Twitter,
629 Instagram,
630 Facebook,
631 LinkedIn,
632 Pinterest,
633 TikTok,
634 Reddit,
635 Spotify,
636 SoundCloud,
637 ApplePodcasts,
638 GoogleMaps,
639 GoogleDocs,
640 CodePen,
641 JsFiddle,
642 CodeSandbox,
643 Gist,
644 SlideShare,
645 Giphy,
646 Typeform,
647 Calendly,
648 Stripe,
649 PayPal,
650 Scribd,
651 Other,
652}
653
654impl EmbedPlatform {
655 pub fn from_url(url: &str) -> Self {
656 let url_lower = url.to_lowercase();
657 if url_lower.contains("youtube.com") || url_lower.contains("youtube-nocookie.com") {
658 EmbedPlatform::YouTube
659 } else if url_lower.contains("player.vimeo.com") || url_lower.contains("vimeo.com") {
660 EmbedPlatform::Vimeo
661 } else if url_lower.contains("dailymotion.com") {
662 EmbedPlatform::Dailymotion
663 } else if url_lower.contains("twitch.tv") {
664 EmbedPlatform::Twitch
665 } else if url_lower.contains("wistia.com") || url_lower.contains("wistia.net") {
666 EmbedPlatform::Wistia
667 } else if url_lower.contains("platform.twitter.com") || url_lower.contains("twitter.com/") || url_lower.contains("x.com") {
668 EmbedPlatform::Twitter
669 } else if url_lower.contains("instagram.com") {
670 EmbedPlatform::Instagram
671 } else if url_lower.contains("facebook.com") || url_lower.contains("fb.com") {
672 EmbedPlatform::Facebook
673 } else if url_lower.contains("linkedin.com") {
674 EmbedPlatform::LinkedIn
675 } else if url_lower.contains("pinterest.com") {
676 EmbedPlatform::Pinterest
677 } else if url_lower.contains("tiktok.com") {
678 EmbedPlatform::TikTok
679 } else if url_lower.contains("reddit.com") || url_lower.contains("redd.it") {
680 EmbedPlatform::Reddit
681 } else if url_lower.contains("open.spotify.com") || url_lower.contains("spotify.com") {
682 EmbedPlatform::Spotify
683 } else if url_lower.contains("soundcloud.com") {
684 EmbedPlatform::SoundCloud
685 } else if url_lower.contains("podcasts.apple.com") {
686 EmbedPlatform::ApplePodcasts
687 } else if url_lower.contains("google.com/maps") || url_lower.contains("maps.google") {
688 EmbedPlatform::GoogleMaps
689 } else if url_lower.contains("docs.google.com") {
690 EmbedPlatform::GoogleDocs
691 } else if url_lower.contains("codepen.io") {
692 EmbedPlatform::CodePen
693 } else if url_lower.contains("jsfiddle.net") {
694 EmbedPlatform::JsFiddle
695 } else if url_lower.contains("codesandbox.io") {
696 EmbedPlatform::CodeSandbox
697 } else if url_lower.contains("gist.github.com") {
698 EmbedPlatform::Gist
699 } else if url_lower.contains("slideshare.net") {
700 EmbedPlatform::SlideShare
701 } else if url_lower.contains("giphy.com") {
702 EmbedPlatform::Giphy
703 } else if url_lower.contains("typeform.com") {
704 EmbedPlatform::Typeform
705 } else if url_lower.contains("calendly.com") {
706 EmbedPlatform::Calendly
707 } else if url_lower.contains("stripe.com") {
708 EmbedPlatform::Stripe
709 } else if url_lower.contains("paypal.com") {
710 EmbedPlatform::PayPal
711 } else if url_lower.contains("scribd.com") {
712 EmbedPlatform::Scribd
713 } else {
714 EmbedPlatform::Other
715 }
716 }
717}
718
719#[derive(Debug, Clone, Serialize, Deserialize)]
721pub struct EmbeddedMedia {
722 pub url: String,
724 pub absolute_url: Option<String>,
726 pub platform: EmbedPlatform,
728 pub title: Option<String>,
730 pub width: Option<u32>,
732 pub height: Option<u32>,
734 pub allow: Option<String>,
736 pub sandbox: Option<String>,
738 pub loading: Option<String>,
740 pub frameborder: Option<String>,
742}
743
744impl Default for EmbeddedMedia {
745 fn default() -> Self {
746 Self {
747 url: String::new(),
748 absolute_url: None,
749 platform: EmbedPlatform::Other,
750 title: None,
751 width: None,
752 height: None,
753 allow: None,
754 sandbox: None,
755 loading: None,
756 frameborder: None,
757 }
758 }
759}
760
761#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
767#[serde(rename_all = "lowercase")]
768pub enum LinkType {
769 #[default]
770 Internal,
771 External,
772 Mailto,
773 Tel,
774 Download,
775 Anchor,
776}
777
778#[derive(Debug, Clone, Serialize, Deserialize)]
780pub struct LinkMedia {
781 pub href: String,
783 pub absolute_url: Option<String>,
785 pub text: String,
787 pub title: Option<String>,
789 pub rel: Vec<String>,
791 pub link_type: LinkType,
793 pub is_nofollow: bool,
795 pub is_sponsored: bool,
797 pub is_ugc: bool,
799 pub target: Option<String>,
801 pub download: Option<String>,
803 pub hreflang: Option<String>,
805 pub media_type: Option<MediaType>,
807}
808
809impl Default for LinkMedia {
810 fn default() -> Self {
811 Self {
812 href: String::new(),
813 absolute_url: None,
814 text: String::new(),
815 title: None,
816 rel: Vec::new(),
817 link_type: LinkType::Internal,
818 is_nofollow: false,
819 is_sponsored: false,
820 is_ugc: false,
821 target: None,
822 download: None,
823 hreflang: None,
824 media_type: None,
825 }
826 }
827}
828
829#[derive(Debug, Clone, Serialize, Deserialize)]
835pub struct MediaConfig {
836 pub extract_images: bool,
838 pub extract_videos: bool,
840 pub extract_audio: bool,
842 pub extract_documents: bool,
844 pub extract_embeds: bool,
846 pub extract_links: bool,
848 pub include_data_urls: bool,
850 pub filter_placeholders: bool,
852 pub min_image_width: Option<u32>,
854 pub min_image_height: Option<u32>,
856 pub download: DownloadConfig,
858}
859
860impl Default for MediaConfig {
861 fn default() -> Self {
862 Self {
863 extract_images: true,
864 extract_videos: true,
865 extract_audio: true,
866 extract_documents: true,
867 extract_embeds: true,
868 extract_links: true,
869 include_data_urls: false,
870 filter_placeholders: true,
871 min_image_width: None,
872 min_image_height: None,
873 download: DownloadConfig::default(),
874 }
875 }
876}
877
878impl MediaConfig {
879 pub fn minimal() -> Self {
881 Self {
882 extract_images: true,
883 extract_videos: false,
884 extract_audio: false,
885 extract_documents: false,
886 extract_embeds: false,
887 extract_links: true,
888 ..Default::default()
889 }
890 }
891
892 pub fn full() -> Self {
894 Self {
895 extract_images: true,
896 extract_videos: true,
897 extract_audio: true,
898 extract_documents: true,
899 extract_embeds: true,
900 extract_links: true,
901 include_data_urls: true,
902 ..Default::default()
903 }
904 }
905}
906
907#[derive(Debug, Clone, Serialize, Deserialize)]
909pub struct DownloadConfig {
910 pub max_file_size: Option<u64>,
912 pub max_concurrent: usize,
914 pub timeout_secs: u64,
916 pub encode_base64: bool,
918 pub max_retries: u32,
920 pub retry_delay_ms: u64,
922 pub user_agent: String,
924}
925
926impl Default for DownloadConfig {
927 fn default() -> Self {
928 Self {
929 max_file_size: Some(50 * 1024 * 1024), max_concurrent: 10,
931 timeout_secs: 30,
932 encode_base64: false,
933 max_retries: 2,
934 retry_delay_ms: 1000,
935 user_agent: "halldyll-media/1.0".to_string(),
936 }
937 }
938}
939
940#[derive(Debug, Clone)]
942pub struct DownloadResult {
943 pub url: String,
945 pub bytes: Bytes,
947 pub content_type: Option<String>,
949 pub size: u64,
951 pub hash: String,
953 pub media_type: MediaType,
955 pub base64: Option<String>,
957}
958
959#[derive(Debug, Clone, Default, Serialize, Deserialize)]
965pub struct ExtractedMedia {
966 pub images: Vec<ImageMedia>,
968 pub videos: Vec<VideoMedia>,
970 pub audio: Vec<AudioMedia>,
972 pub documents: Vec<DocumentMedia>,
974 pub embeds: Vec<EmbeddedMedia>,
976 pub links: Vec<LinkMedia>,
978}
979
980impl ExtractedMedia {
981 pub fn new() -> Self {
982 Self::default()
983 }
984
985 pub fn total_count(&self) -> usize {
987 self.images.len()
988 + self.videos.len()
989 + self.audio.len()
990 + self.documents.len()
991 + self.embeds.len()
992 + self.links.len()
993 }
994
995 pub fn is_empty(&self) -> bool {
997 self.total_count() == 0
998 }
999
1000 pub fn has_media(&self) -> bool {
1002 !self.is_empty()
1003 }
1004
1005 pub fn all_urls(&self) -> Vec<String> {
1007 let mut urls = Vec::new();
1008
1009 for img in &self.images {
1010 if let Some(url) = &img.absolute_url {
1011 urls.push(url.clone());
1012 }
1013 }
1014
1015 for vid in &self.videos {
1016 if let Some(url) = &vid.absolute_url {
1017 urls.push(url.clone());
1018 }
1019 }
1020
1021 for aud in &self.audio {
1022 if let Some(url) = &aud.absolute_url {
1023 urls.push(url.clone());
1024 }
1025 }
1026
1027 for doc in &self.documents {
1028 if let Some(url) = &doc.absolute_url {
1029 urls.push(url.clone());
1030 }
1031 }
1032
1033 for emb in &self.embeds {
1034 if let Some(url) = &emb.absolute_url {
1035 urls.push(url.clone());
1036 }
1037 }
1038
1039 urls
1040 }
1041
1042 pub fn image_urls(&self) -> Vec<&str> {
1044 self.images.iter()
1045 .filter_map(|i| i.absolute_url.as_deref())
1046 .collect()
1047 }
1048
1049 pub fn video_urls(&self) -> Vec<&str> {
1051 self.videos.iter()
1052 .filter_map(|v| v.absolute_url.as_deref())
1053 .collect()
1054 }
1055}
1056
1057#[cfg(test)]
1062mod tests {
1063 use super::*;
1064
1065 #[test]
1066 fn test_media_type_from_extension() {
1067 assert_eq!(MediaType::from_extension("jpg"), MediaType::Image);
1068 assert_eq!(MediaType::from_extension("PNG"), MediaType::Image);
1069 assert_eq!(MediaType::from_extension("mp4"), MediaType::Video);
1070 assert_eq!(MediaType::from_extension("mp3"), MediaType::Audio);
1071 assert_eq!(MediaType::from_extension("pdf"), MediaType::Document);
1072 assert_eq!(MediaType::from_extension("xyz"), MediaType::Other);
1073 }
1074
1075 #[test]
1076 fn test_media_type_from_mime() {
1077 assert_eq!(MediaType::from_mime("image/jpeg"), MediaType::Image);
1078 assert_eq!(MediaType::from_mime("video/mp4"), MediaType::Video);
1079 assert_eq!(MediaType::from_mime("audio/mpeg"), MediaType::Audio);
1080 assert_eq!(MediaType::from_mime("application/pdf"), MediaType::Document);
1081 }
1082
1083 #[test]
1084 fn test_image_format() {
1085 assert_eq!(ImageFormat::from_extension("jpg"), ImageFormat::Jpeg);
1086 assert_eq!(ImageFormat::from_extension("webp"), ImageFormat::WebP);
1087 assert_eq!(ImageFormat::from_mime("image/png"), ImageFormat::Png);
1088 assert_eq!(ImageFormat::Png.mime_type(), "image/png");
1089 }
1090
1091 #[test]
1092 fn test_video_platform_detection() {
1093 assert_eq!(VideoPlatform::from_url("https://youtube.com/watch?v=abc"), VideoPlatform::YouTube);
1094 assert_eq!(VideoPlatform::from_url("https://vimeo.com/123"), VideoPlatform::Vimeo);
1095 assert_eq!(VideoPlatform::from_url("https://twitch.tv/channel"), VideoPlatform::Twitch);
1096 assert_eq!(VideoPlatform::from_url("https://example.com/video.mp4"), VideoPlatform::Other);
1097 }
1098
1099 #[test]
1100 fn test_audio_platform_detection() {
1101 assert_eq!(AudioPlatform::from_url("https://open.spotify.com/track/abc"), AudioPlatform::Spotify);
1102 assert_eq!(AudioPlatform::from_url("https://soundcloud.com/artist/track"), AudioPlatform::SoundCloud);
1103 }
1104
1105 #[test]
1106 fn test_embed_platform_detection() {
1107 assert_eq!(EmbedPlatform::from_url("https://www.youtube.com/embed/abc"), EmbedPlatform::YouTube);
1108 assert_eq!(EmbedPlatform::from_url("https://player.vimeo.com/video/123"), EmbedPlatform::Vimeo);
1109 assert_eq!(EmbedPlatform::from_url("https://codepen.io/user/pen/abc"), EmbedPlatform::CodePen);
1110 }
1111
1112 #[test]
1113 fn test_document_type() {
1114 assert_eq!(DocumentType::from_extension("pdf"), DocumentType::Pdf);
1115 assert_eq!(DocumentType::from_extension("docx"), DocumentType::Word);
1116 assert_eq!(DocumentType::from_extension("xlsx"), DocumentType::Excel);
1117 }
1118
1119 #[test]
1120 fn test_extracted_media() {
1121 let mut media = ExtractedMedia::new();
1122 assert!(!media.has_media());
1123 assert_eq!(media.total_count(), 0);
1124
1125 media.images.push(ImageMedia::default());
1126 assert!(media.has_media());
1127 assert_eq!(media.total_count(), 1);
1128 }
1129
1130 #[test]
1131 fn test_media_config() {
1132 let config = MediaConfig::default();
1133 assert!(config.extract_images);
1134 assert!(config.download.max_concurrent > 0);
1135
1136 let minimal = MediaConfig::minimal();
1137 assert!(minimal.extract_images);
1138 assert!(!minimal.extract_videos);
1139 }
1140}