use bytes::Bytes;
use serde::{Deserialize, Serialize};
use thiserror::Error;
#[derive(Error, Debug)]
pub enum MediaError {
#[error("Failed to download media: {0}")]
Download(String),
#[error("Network error: {0}")]
Network(String),
#[error("HTTP error {0}: {1}")]
Http(u16, String),
#[error("Invalid URL: {0}")]
InvalidUrl(String),
#[error("Unsupported media type: {0}")]
UnsupportedType(String),
#[error("File too large: {0} bytes (max: {1})")]
FileTooLarge(u64, u64),
#[error("Timeout downloading: {0}")]
Timeout(String),
#[error("IO error: {0}")]
Io(String),
#[error("Parse error: {0}")]
Parse(String),
}
pub type MediaResult<T> = Result<T, MediaError>;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum MediaType {
Image,
Video,
Audio,
Document,
Embedded,
Other,
}
impl MediaType {
pub fn from_extension(ext: &str) -> Self {
let ext = ext.to_lowercase();
match ext.as_str() {
"jpg" | "jpeg" | "png" | "gif" | "webp" | "svg" | "ico" | "bmp"
| "avif" | "heic" | "heif" | "tiff" | "tif" => MediaType::Image,
"mp4" | "webm" | "ogg" | "ogv" | "avi" | "mov" | "mkv" | "m4v"
| "wmv" | "flv" | "3gp" => MediaType::Video,
"mp3" | "wav" | "oga" | "flac" | "aac" | "m4a" | "wma"
| "opus" | "aiff" => MediaType::Audio,
"pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx"
| "txt" | "rtf" | "odt" | "ods" | "odp" | "csv" | "epub" => MediaType::Document,
_ => MediaType::Other,
}
}
pub fn from_mime(mime: &str) -> Self {
let mime_lower = mime.to_lowercase();
if mime_lower.starts_with("image/") {
MediaType::Image
} else if mime_lower.starts_with("video/") {
MediaType::Video
} else if mime_lower.starts_with("audio/") {
MediaType::Audio
} else if mime_lower.starts_with("application/pdf")
|| mime_lower.contains("document")
|| mime_lower.contains("spreadsheet")
|| mime_lower.contains("presentation")
{
MediaType::Document
} else {
MediaType::Other
}
}
}
impl std::fmt::Display for MediaType {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
MediaType::Image => write!(f, "image"),
MediaType::Video => write!(f, "video"),
MediaType::Audio => write!(f, "audio"),
MediaType::Document => write!(f, "document"),
MediaType::Embedded => write!(f, "embedded"),
MediaType::Other => write!(f, "other"),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum ImageFormat {
Jpeg,
Png,
Gif,
WebP,
Svg,
Avif,
Heic,
Ico,
Bmp,
Tiff,
Unknown,
}
impl ImageFormat {
pub fn from_extension(ext: &str) -> Self {
match ext.to_lowercase().as_str() {
"jpg" | "jpeg" => ImageFormat::Jpeg,
"png" => ImageFormat::Png,
"gif" => ImageFormat::Gif,
"webp" => ImageFormat::WebP,
"svg" => ImageFormat::Svg,
"avif" => ImageFormat::Avif,
"heic" | "heif" => ImageFormat::Heic,
"ico" => ImageFormat::Ico,
"bmp" => ImageFormat::Bmp,
"tiff" | "tif" => ImageFormat::Tiff,
_ => ImageFormat::Unknown,
}
}
pub fn from_mime(mime: &str) -> Self {
match mime.to_lowercase().as_str() {
"image/jpeg" => ImageFormat::Jpeg,
"image/png" => ImageFormat::Png,
"image/gif" => ImageFormat::Gif,
"image/webp" => ImageFormat::WebP,
"image/svg+xml" => ImageFormat::Svg,
"image/avif" => ImageFormat::Avif,
"image/heic" | "image/heif" => ImageFormat::Heic,
"image/x-icon" | "image/vnd.microsoft.icon" => ImageFormat::Ico,
"image/bmp" => ImageFormat::Bmp,
"image/tiff" => ImageFormat::Tiff,
_ => ImageFormat::Unknown,
}
}
pub fn mime_type(&self) -> &'static str {
match self {
ImageFormat::Jpeg => "image/jpeg",
ImageFormat::Png => "image/png",
ImageFormat::Gif => "image/gif",
ImageFormat::WebP => "image/webp",
ImageFormat::Svg => "image/svg+xml",
ImageFormat::Avif => "image/avif",
ImageFormat::Heic => "image/heic",
ImageFormat::Ico => "image/x-icon",
ImageFormat::Bmp => "image/bmp",
ImageFormat::Tiff => "image/tiff",
ImageFormat::Unknown => "application/octet-stream",
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum ImageLoading {
#[default]
Eager,
Lazy,
}
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub struct SrcsetEntry {
pub url: String,
pub width: Option<u32>,
pub density: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ImageMedia {
pub src: String,
pub absolute_url: Option<String>,
pub alt: Option<String>,
pub title: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub format: ImageFormat,
pub mime_type: Option<String>,
pub loading: ImageLoading,
pub is_decorative: bool,
pub srcset: Vec<SrcsetEntry>,
pub sizes: Option<String>,
pub data_src: Option<String>,
pub is_placeholder: bool,
pub size_bytes: Option<usize>,
pub content_hash: Option<String>,
pub classes: Vec<String>,
pub id: Option<String>,
}
impl Default for ImageMedia {
fn default() -> Self {
Self {
src: String::new(),
absolute_url: None,
alt: None,
title: None,
width: None,
height: None,
format: ImageFormat::Unknown,
mime_type: None,
loading: ImageLoading::Eager,
is_decorative: false,
srcset: Vec::new(),
sizes: None,
data_src: None,
is_placeholder: false,
size_bytes: None,
content_hash: None,
classes: Vec::new(),
id: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum VideoPlatform {
YouTube,
Vimeo,
Dailymotion,
Twitch,
Facebook,
Twitter,
TikTok,
Wistia,
Brightcove,
JWPlayer,
VideoJs,
Html5,
Other,
}
impl VideoPlatform {
pub fn from_url(url: &str) -> Self {
let url_lower = url.to_lowercase();
if url_lower.contains("youtube.com") || url_lower.contains("youtu.be") {
VideoPlatform::YouTube
} else if url_lower.contains("vimeo.com") {
VideoPlatform::Vimeo
} else if url_lower.contains("dailymotion.com") || url_lower.contains("dai.ly") {
VideoPlatform::Dailymotion
} else if url_lower.contains("twitch.tv") {
VideoPlatform::Twitch
} else if url_lower.contains("facebook.com") || url_lower.contains("fb.watch") {
VideoPlatform::Facebook
} else if url_lower.contains("twitter.com") || url_lower.contains("x.com") {
VideoPlatform::Twitter
} else if url_lower.contains("tiktok.com") {
VideoPlatform::TikTok
} else if url_lower.contains("wistia.com") || url_lower.contains("wistia.net") {
VideoPlatform::Wistia
} else if url_lower.contains("brightcove") {
VideoPlatform::Brightcove
} else if url_lower.contains("jwplayer") || url_lower.contains("jwplatform") {
VideoPlatform::JWPlayer
} else {
VideoPlatform::Other
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoMedia {
pub src: String,
pub absolute_url: Option<String>,
pub platform: VideoPlatform,
pub video_id: Option<String>,
pub poster: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub duration: Option<f64>,
pub mime_type: Option<String>,
pub title: Option<String>,
pub sources: Vec<VideoSource>,
pub tracks: Vec<VideoTrack>,
pub autoplay: bool,
pub loop_video: bool,
pub muted: bool,
pub controls: bool,
pub playsinline: bool,
pub embed_url: Option<String>,
pub size_bytes: Option<usize>,
}
impl Default for VideoMedia {
fn default() -> Self {
Self {
src: String::new(),
absolute_url: None,
platform: VideoPlatform::Html5,
video_id: None,
poster: None,
width: None,
height: None,
duration: None,
mime_type: None,
title: None,
sources: Vec::new(),
tracks: Vec::new(),
autoplay: false,
loop_video: false,
muted: false,
controls: true,
playsinline: false,
embed_url: None,
size_bytes: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoSource {
pub src: String,
pub mime_type: Option<String>,
pub quality: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct VideoTrack {
pub src: String,
pub kind: TrackKind,
pub label: Option<String>,
pub srclang: Option<String>,
pub is_default: bool,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
#[derive(Default)]
pub enum TrackKind {
#[default]
Subtitles,
Captions,
Descriptions,
Chapters,
Metadata,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum AudioPlatform {
Spotify,
SoundCloud,
ApplePodcasts,
Anchor,
Podbean,
Buzzsprout,
Html5,
Other,
}
impl AudioPlatform {
pub fn from_url(url: &str) -> Self {
let url_lower = url.to_lowercase();
if url_lower.contains("spotify.com") || url_lower.contains("open.spotify") {
AudioPlatform::Spotify
} else if url_lower.contains("soundcloud.com") {
AudioPlatform::SoundCloud
} else if url_lower.contains("podcasts.apple.com") {
AudioPlatform::ApplePodcasts
} else if url_lower.contains("anchor.fm") {
AudioPlatform::Anchor
} else if url_lower.contains("podbean.com") {
AudioPlatform::Podbean
} else if url_lower.contains("buzzsprout.com") {
AudioPlatform::Buzzsprout
} else {
AudioPlatform::Other
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioMedia {
pub src: String,
pub absolute_url: Option<String>,
pub platform: AudioPlatform,
pub title: Option<String>,
pub artist: Option<String>,
pub album: Option<String>,
pub duration: Option<f64>,
pub mime_type: Option<String>,
pub sources: Vec<AudioSource>,
pub autoplay: bool,
pub loop_audio: bool,
pub muted: bool,
pub controls: bool,
pub embed_url: Option<String>,
pub size_bytes: Option<usize>,
}
impl Default for AudioMedia {
fn default() -> Self {
Self {
src: String::new(),
absolute_url: None,
platform: AudioPlatform::Html5,
title: None,
artist: None,
album: None,
duration: None,
mime_type: None,
sources: Vec::new(),
autoplay: false,
loop_audio: false,
muted: false,
controls: true,
embed_url: None,
size_bytes: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AudioSource {
pub src: String,
pub mime_type: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum DocumentType {
Pdf,
Word,
Excel,
PowerPoint,
Text,
Csv,
Epub,
Other,
}
impl DocumentType {
pub fn from_extension(ext: &str) -> Self {
match ext.to_lowercase().as_str() {
"pdf" => DocumentType::Pdf,
"doc" | "docx" | "odt" | "rtf" => DocumentType::Word,
"xls" | "xlsx" | "ods" => DocumentType::Excel,
"ppt" | "pptx" | "odp" => DocumentType::PowerPoint,
"txt" => DocumentType::Text,
"csv" => DocumentType::Csv,
"epub" => DocumentType::Epub,
_ => DocumentType::Other,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DocumentMedia {
pub url: String,
pub absolute_url: Option<String>,
pub doc_type: DocumentType,
pub filename: Option<String>,
pub title: Option<String>,
pub mime_type: Option<String>,
pub size_bytes: Option<usize>,
pub page_count: Option<u32>,
}
impl Default for DocumentMedia {
fn default() -> Self {
Self {
url: String::new(),
absolute_url: None,
doc_type: DocumentType::Other,
filename: None,
title: None,
mime_type: None,
size_bytes: None,
page_count: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EmbedType {
Iframe,
Object,
Embed,
Script,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
#[serde(rename_all = "lowercase")]
pub enum EmbedPlatform {
YouTube,
Vimeo,
Dailymotion,
Twitch,
Wistia,
Twitter,
Instagram,
Facebook,
LinkedIn,
Pinterest,
TikTok,
Reddit,
Spotify,
SoundCloud,
ApplePodcasts,
GoogleMaps,
GoogleDocs,
CodePen,
JsFiddle,
CodeSandbox,
Gist,
SlideShare,
Giphy,
Typeform,
Calendly,
Stripe,
PayPal,
Scribd,
Other,
}
impl EmbedPlatform {
pub fn from_url(url: &str) -> Self {
let url_lower = url.to_lowercase();
if url_lower.contains("youtube.com") || url_lower.contains("youtube-nocookie.com") {
EmbedPlatform::YouTube
} else if url_lower.contains("player.vimeo.com") || url_lower.contains("vimeo.com") {
EmbedPlatform::Vimeo
} else if url_lower.contains("dailymotion.com") {
EmbedPlatform::Dailymotion
} else if url_lower.contains("twitch.tv") {
EmbedPlatform::Twitch
} else if url_lower.contains("wistia.com") || url_lower.contains("wistia.net") {
EmbedPlatform::Wistia
} else if url_lower.contains("platform.twitter.com") || url_lower.contains("twitter.com/") || url_lower.contains("x.com") {
EmbedPlatform::Twitter
} else if url_lower.contains("instagram.com") {
EmbedPlatform::Instagram
} else if url_lower.contains("facebook.com") || url_lower.contains("fb.com") {
EmbedPlatform::Facebook
} else if url_lower.contains("linkedin.com") {
EmbedPlatform::LinkedIn
} else if url_lower.contains("pinterest.com") {
EmbedPlatform::Pinterest
} else if url_lower.contains("tiktok.com") {
EmbedPlatform::TikTok
} else if url_lower.contains("reddit.com") || url_lower.contains("redd.it") {
EmbedPlatform::Reddit
} else if url_lower.contains("open.spotify.com") || url_lower.contains("spotify.com") {
EmbedPlatform::Spotify
} else if url_lower.contains("soundcloud.com") {
EmbedPlatform::SoundCloud
} else if url_lower.contains("podcasts.apple.com") {
EmbedPlatform::ApplePodcasts
} else if url_lower.contains("google.com/maps") || url_lower.contains("maps.google") {
EmbedPlatform::GoogleMaps
} else if url_lower.contains("docs.google.com") {
EmbedPlatform::GoogleDocs
} else if url_lower.contains("codepen.io") {
EmbedPlatform::CodePen
} else if url_lower.contains("jsfiddle.net") {
EmbedPlatform::JsFiddle
} else if url_lower.contains("codesandbox.io") {
EmbedPlatform::CodeSandbox
} else if url_lower.contains("gist.github.com") {
EmbedPlatform::Gist
} else if url_lower.contains("slideshare.net") {
EmbedPlatform::SlideShare
} else if url_lower.contains("giphy.com") {
EmbedPlatform::Giphy
} else if url_lower.contains("typeform.com") {
EmbedPlatform::Typeform
} else if url_lower.contains("calendly.com") {
EmbedPlatform::Calendly
} else if url_lower.contains("stripe.com") {
EmbedPlatform::Stripe
} else if url_lower.contains("paypal.com") {
EmbedPlatform::PayPal
} else if url_lower.contains("scribd.com") {
EmbedPlatform::Scribd
} else {
EmbedPlatform::Other
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EmbeddedMedia {
pub url: String,
pub absolute_url: Option<String>,
pub platform: EmbedPlatform,
pub title: Option<String>,
pub width: Option<u32>,
pub height: Option<u32>,
pub allow: Option<String>,
pub sandbox: Option<String>,
pub loading: Option<String>,
pub frameborder: Option<String>,
}
impl Default for EmbeddedMedia {
fn default() -> Self {
Self {
url: String::new(),
absolute_url: None,
platform: EmbedPlatform::Other,
title: None,
width: None,
height: None,
allow: None,
sandbox: None,
loading: None,
frameborder: None,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, Default)]
#[serde(rename_all = "lowercase")]
pub enum LinkType {
#[default]
Internal,
External,
Mailto,
Tel,
Download,
Anchor,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LinkMedia {
pub href: String,
pub absolute_url: Option<String>,
pub text: String,
pub title: Option<String>,
pub rel: Vec<String>,
pub link_type: LinkType,
pub is_nofollow: bool,
pub is_sponsored: bool,
pub is_ugc: bool,
pub target: Option<String>,
pub download: Option<String>,
pub hreflang: Option<String>,
pub media_type: Option<MediaType>,
}
impl Default for LinkMedia {
fn default() -> Self {
Self {
href: String::new(),
absolute_url: None,
text: String::new(),
title: None,
rel: Vec::new(),
link_type: LinkType::Internal,
is_nofollow: false,
is_sponsored: false,
is_ugc: false,
target: None,
download: None,
hreflang: None,
media_type: None,
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct MediaConfig {
pub extract_images: bool,
pub extract_videos: bool,
pub extract_audio: bool,
pub extract_documents: bool,
pub extract_embeds: bool,
pub extract_links: bool,
pub include_data_urls: bool,
pub filter_placeholders: bool,
pub min_image_width: Option<u32>,
pub min_image_height: Option<u32>,
pub download: DownloadConfig,
}
impl Default for MediaConfig {
fn default() -> Self {
Self {
extract_images: true,
extract_videos: true,
extract_audio: true,
extract_documents: true,
extract_embeds: true,
extract_links: true,
include_data_urls: false,
filter_placeholders: true,
min_image_width: None,
min_image_height: None,
download: DownloadConfig::default(),
}
}
}
impl MediaConfig {
pub fn minimal() -> Self {
Self {
extract_images: true,
extract_videos: false,
extract_audio: false,
extract_documents: false,
extract_embeds: false,
extract_links: true,
..Default::default()
}
}
pub fn full() -> Self {
Self {
extract_images: true,
extract_videos: true,
extract_audio: true,
extract_documents: true,
extract_embeds: true,
extract_links: true,
include_data_urls: true,
..Default::default()
}
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct DownloadConfig {
pub max_file_size: Option<u64>,
pub max_concurrent: usize,
pub timeout_secs: u64,
pub encode_base64: bool,
pub max_retries: u32,
pub retry_delay_ms: u64,
pub user_agent: String,
}
impl Default for DownloadConfig {
fn default() -> Self {
Self {
max_file_size: Some(50 * 1024 * 1024), max_concurrent: 10,
timeout_secs: 30,
encode_base64: false,
max_retries: 2,
retry_delay_ms: 1000,
user_agent: "halldyll-media/1.0".to_string(),
}
}
}
#[derive(Debug, Clone)]
pub struct DownloadResult {
pub url: String,
pub bytes: Bytes,
pub content_type: Option<String>,
pub size: u64,
pub hash: String,
pub media_type: MediaType,
pub base64: Option<String>,
}
#[derive(Debug, Clone, Default, Serialize, Deserialize)]
pub struct ExtractedMedia {
pub images: Vec<ImageMedia>,
pub videos: Vec<VideoMedia>,
pub audio: Vec<AudioMedia>,
pub documents: Vec<DocumentMedia>,
pub embeds: Vec<EmbeddedMedia>,
pub links: Vec<LinkMedia>,
}
impl ExtractedMedia {
pub fn new() -> Self {
Self::default()
}
pub fn total_count(&self) -> usize {
self.images.len()
+ self.videos.len()
+ self.audio.len()
+ self.documents.len()
+ self.embeds.len()
+ self.links.len()
}
pub fn is_empty(&self) -> bool {
self.total_count() == 0
}
pub fn has_media(&self) -> bool {
!self.is_empty()
}
pub fn all_urls(&self) -> Vec<String> {
let mut urls = Vec::new();
for img in &self.images {
if let Some(url) = &img.absolute_url {
urls.push(url.clone());
}
}
for vid in &self.videos {
if let Some(url) = &vid.absolute_url {
urls.push(url.clone());
}
}
for aud in &self.audio {
if let Some(url) = &aud.absolute_url {
urls.push(url.clone());
}
}
for doc in &self.documents {
if let Some(url) = &doc.absolute_url {
urls.push(url.clone());
}
}
for emb in &self.embeds {
if let Some(url) = &emb.absolute_url {
urls.push(url.clone());
}
}
urls
}
pub fn image_urls(&self) -> Vec<&str> {
self.images.iter()
.filter_map(|i| i.absolute_url.as_deref())
.collect()
}
pub fn video_urls(&self) -> Vec<&str> {
self.videos.iter()
.filter_map(|v| v.absolute_url.as_deref())
.collect()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_media_type_from_extension() {
assert_eq!(MediaType::from_extension("jpg"), MediaType::Image);
assert_eq!(MediaType::from_extension("PNG"), MediaType::Image);
assert_eq!(MediaType::from_extension("mp4"), MediaType::Video);
assert_eq!(MediaType::from_extension("mp3"), MediaType::Audio);
assert_eq!(MediaType::from_extension("pdf"), MediaType::Document);
assert_eq!(MediaType::from_extension("xyz"), MediaType::Other);
}
#[test]
fn test_media_type_from_mime() {
assert_eq!(MediaType::from_mime("image/jpeg"), MediaType::Image);
assert_eq!(MediaType::from_mime("video/mp4"), MediaType::Video);
assert_eq!(MediaType::from_mime("audio/mpeg"), MediaType::Audio);
assert_eq!(MediaType::from_mime("application/pdf"), MediaType::Document);
}
#[test]
fn test_image_format() {
assert_eq!(ImageFormat::from_extension("jpg"), ImageFormat::Jpeg);
assert_eq!(ImageFormat::from_extension("webp"), ImageFormat::WebP);
assert_eq!(ImageFormat::from_mime("image/png"), ImageFormat::Png);
assert_eq!(ImageFormat::Png.mime_type(), "image/png");
}
#[test]
fn test_video_platform_detection() {
assert_eq!(VideoPlatform::from_url("https://youtube.com/watch?v=abc"), VideoPlatform::YouTube);
assert_eq!(VideoPlatform::from_url("https://vimeo.com/123"), VideoPlatform::Vimeo);
assert_eq!(VideoPlatform::from_url("https://twitch.tv/channel"), VideoPlatform::Twitch);
assert_eq!(VideoPlatform::from_url("https://example.com/video.mp4"), VideoPlatform::Other);
}
#[test]
fn test_audio_platform_detection() {
assert_eq!(AudioPlatform::from_url("https://open.spotify.com/track/abc"), AudioPlatform::Spotify);
assert_eq!(AudioPlatform::from_url("https://soundcloud.com/artist/track"), AudioPlatform::SoundCloud);
}
#[test]
fn test_embed_platform_detection() {
assert_eq!(EmbedPlatform::from_url("https://www.youtube.com/embed/abc"), EmbedPlatform::YouTube);
assert_eq!(EmbedPlatform::from_url("https://player.vimeo.com/video/123"), EmbedPlatform::Vimeo);
assert_eq!(EmbedPlatform::from_url("https://codepen.io/user/pen/abc"), EmbedPlatform::CodePen);
}
#[test]
fn test_document_type() {
assert_eq!(DocumentType::from_extension("pdf"), DocumentType::Pdf);
assert_eq!(DocumentType::from_extension("docx"), DocumentType::Word);
assert_eq!(DocumentType::from_extension("xlsx"), DocumentType::Excel);
}
#[test]
fn test_extracted_media() {
let mut media = ExtractedMedia::new();
assert!(!media.has_media());
assert_eq!(media.total_count(), 0);
media.images.push(ImageMedia::default());
assert!(media.has_media());
assert_eq!(media.total_count(), 1);
}
#[test]
fn test_media_config() {
let config = MediaConfig::default();
assert!(config.extract_images);
assert!(config.download.max_concurrent > 0);
let minimal = MediaConfig::minimal();
assert!(minimal.extract_images);
assert!(!minimal.extract_videos);
}
}