Skip to main content

rustyclaw_core/messengers/
media.rs

1//! Media pipeline for image, audio, and video processing.
2//!
3//! Provides utilities for handling media attachments in messenger conversations:
4//! - Image processing (resize, format conversion, size caps)
5//! - Audio transcription (via external tools like whisper)
6//! - Video frame extraction
7//! - MIME type detection
8//! - Size limit enforcement
9
10use serde::{Deserialize, Serialize};
11use std::path::{Path, PathBuf};
12use std::process::{Command, Stdio};
13use tracing::{debug, warn};
14
15/// Supported media types.
16#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "snake_case")]
18pub enum MediaType {
19    Image,
20    Audio,
21    Video,
22    Document,
23    Unknown,
24}
25
26impl MediaType {
27    /// Detect media type from file extension.
28    pub fn from_extension(ext: &str) -> Self {
29        match ext.to_lowercase().as_str() {
30            "jpg" | "jpeg" | "png" | "gif" | "webp" | "bmp" | "svg" | "tiff" | "ico" => {
31                Self::Image
32            }
33            "mp3" | "wav" | "ogg" | "flac" | "m4a" | "aac" | "wma" | "opus" => Self::Audio,
34            "mp4" | "webm" | "avi" | "mov" | "mkv" | "flv" | "wmv" => Self::Video,
35            "pdf" | "doc" | "docx" | "txt" | "rtf" | "odt" => Self::Document,
36            _ => Self::Unknown,
37        }
38    }
39
40    /// Detect media type from MIME type string.
41    pub fn from_mime(mime: &str) -> Self {
42        if mime.starts_with("image/") {
43            Self::Image
44        } else if mime.starts_with("audio/") {
45            Self::Audio
46        } else if mime.starts_with("video/") {
47            Self::Video
48        } else if mime.starts_with("application/pdf")
49            || mime.starts_with("application/msword")
50            || mime.starts_with("text/")
51        {
52            Self::Document
53        } else {
54            Self::Unknown
55        }
56    }
57}
58
59/// Media pipeline configuration.
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct MediaConfig {
62    /// Maximum file size in bytes for image uploads (default: 10 MB).
63    #[serde(default = "default_image_max")]
64    pub image_max_bytes: usize,
65
66    /// Maximum file size in bytes for audio uploads (default: 25 MB).
67    #[serde(default = "default_audio_max")]
68    pub audio_max_bytes: usize,
69
70    /// Maximum file size in bytes for video uploads (default: 50 MB).
71    #[serde(default = "default_video_max")]
72    pub video_max_bytes: usize,
73
74    /// Maximum image dimension (width or height) for resizing.
75    #[serde(default = "default_max_dimension")]
76    pub max_image_dimension: u32,
77
78    /// Whether to auto-transcribe audio attachments.
79    #[serde(default)]
80    pub auto_transcribe: bool,
81
82    /// Whisper model size for transcription ("tiny", "base", "small", "medium", "large").
83    #[serde(default = "default_whisper_model")]
84    pub whisper_model: String,
85
86    /// Temporary directory for processed media.
87    #[serde(default = "default_temp_dir")]
88    pub temp_dir: PathBuf,
89}
90
91fn default_image_max() -> usize {
92    10 * 1024 * 1024
93}
94fn default_audio_max() -> usize {
95    25 * 1024 * 1024
96}
97fn default_video_max() -> usize {
98    50 * 1024 * 1024
99}
100fn default_max_dimension() -> u32 {
101    2048
102}
103fn default_whisper_model() -> String {
104    "base".to_string()
105}
106fn default_temp_dir() -> PathBuf {
107    std::env::temp_dir().join("rustyclaw-media")
108}
109
110impl Default for MediaConfig {
111    fn default() -> Self {
112        Self {
113            image_max_bytes: default_image_max(),
114            audio_max_bytes: default_audio_max(),
115            video_max_bytes: default_video_max(),
116            max_image_dimension: default_max_dimension(),
117            auto_transcribe: false,
118            whisper_model: default_whisper_model(),
119            temp_dir: default_temp_dir(),
120        }
121    }
122}
123
124/// Result of processing a media file.
125#[derive(Debug, Clone, Serialize)]
126pub struct ProcessedMedia {
127    /// Original file path.
128    pub original_path: PathBuf,
129    /// Processed file path (may be same as original if no processing needed).
130    pub processed_path: PathBuf,
131    /// Detected media type.
132    pub media_type: MediaType,
133    /// File size in bytes after processing.
134    pub size_bytes: u64,
135    /// Optional transcription text (for audio/video).
136    pub transcription: Option<String>,
137    /// Optional description (for images via vision model).
138    pub description: Option<String>,
139    /// MIME type.
140    pub mime_type: String,
141}
142
143/// Check if a file exceeds size limits.
144pub fn check_size_limit(path: &Path, config: &MediaConfig) -> Result<(), String> {
145    let metadata = std::fs::metadata(path)
146        .map_err(|e| format!("Cannot read file metadata: {}", e))?;
147    let size = metadata.len() as usize;
148
149    let ext = path
150        .extension()
151        .and_then(|e| e.to_str())
152        .unwrap_or("");
153    let media_type = MediaType::from_extension(ext);
154
155    let limit = match media_type {
156        MediaType::Image => config.image_max_bytes,
157        MediaType::Audio => config.audio_max_bytes,
158        MediaType::Video => config.video_max_bytes,
159        _ => config.video_max_bytes, // use largest limit as fallback
160    };
161
162    if size > limit {
163        Err(format!(
164            "File size ({} bytes) exceeds limit ({} bytes) for {:?}",
165            size, limit, media_type
166        ))
167    } else {
168        Ok(())
169    }
170}
171
172/// Resize an image using ImageMagick convert or ffmpeg.
173pub fn resize_image(
174    input: &Path,
175    max_dimension: u32,
176    output_dir: &Path,
177) -> Result<PathBuf, String> {
178    let filename = input
179        .file_name()
180        .and_then(|f| f.to_str())
181        .unwrap_or("output.jpg");
182    let output = output_dir.join(format!("resized_{}", filename));
183
184    std::fs::create_dir_all(output_dir)
185        .map_err(|e| format!("Failed to create output dir: {}", e))?;
186
187    // Try ImageMagick first
188    let result = Command::new("convert")
189        .args([
190            input.to_string_lossy().as_ref(),
191            "-resize",
192            &format!("{}x{}>", max_dimension, max_dimension),
193            output.to_string_lossy().as_ref(),
194        ])
195        .stdout(Stdio::piped())
196        .stderr(Stdio::piped())
197        .output();
198
199    if let Ok(out) = result {
200        if out.status.success() {
201            debug!(input = %input.display(), output = %output.display(), "Image resized with ImageMagick");
202            return Ok(output);
203        }
204    }
205
206    // Fallback to ffmpeg
207    let result = Command::new("ffmpeg")
208        .args([
209            "-i",
210            input.to_string_lossy().as_ref(),
211            "-vf",
212            &format!(
213                "scale='min({0},iw)':'min({0},ih)':force_original_aspect_ratio=decrease",
214                max_dimension
215            ),
216            "-y",
217            output.to_string_lossy().as_ref(),
218        ])
219        .stdout(Stdio::piped())
220        .stderr(Stdio::piped())
221        .output();
222
223    if let Ok(out) = result {
224        if out.status.success() {
225            debug!(input = %input.display(), output = %output.display(), "Image resized with ffmpeg");
226            return Ok(output);
227        }
228    }
229
230    // No resize tools available — return original
231    warn!("No image resize tools available (install ImageMagick or ffmpeg)");
232    Ok(input.to_path_buf())
233}
234
235/// Transcribe an audio file using whisper.
236pub fn transcribe_audio(input: &Path, model: &str) -> Result<String, String> {
237    // Try whisper CLI (OpenAI's whisper or whisper.cpp)
238    let result = Command::new("whisper")
239        .args([
240            input.to_string_lossy().as_ref(),
241            "--model",
242            model,
243            "--output_format",
244            "txt",
245            "--output_dir",
246            "/tmp",
247        ])
248        .stdout(Stdio::piped())
249        .stderr(Stdio::piped())
250        .output();
251
252    if let Ok(out) = result {
253        if out.status.success() {
254            // whisper writes to <input_name>.txt
255            let txt_path = PathBuf::from("/tmp").join(
256                input
257                    .file_stem()
258                    .and_then(|s| s.to_str())
259                    .unwrap_or("audio"),
260            ).with_extension("txt");
261
262            if let Ok(text) = std::fs::read_to_string(&txt_path) {
263                debug!(input = %input.display(), "Audio transcribed with whisper");
264                return Ok(text.trim().to_string());
265            }
266        }
267    }
268
269    // Fallback: try whisper.cpp main binary
270    let result = Command::new("main")
271        .args([
272            "-m",
273            &format!("models/ggml-{}.bin", model),
274            "-f",
275            input.to_string_lossy().as_ref(),
276            "--output-txt",
277        ])
278        .stdout(Stdio::piped())
279        .stderr(Stdio::piped())
280        .output();
281
282    if let Ok(out) = result {
283        if out.status.success() {
284            let text = String::from_utf8_lossy(&out.stdout).trim().to_string();
285            if !text.is_empty() {
286                return Ok(text);
287            }
288        }
289    }
290
291    Err("Transcription failed. Install whisper (pip install openai-whisper) \
292         or whisper.cpp for audio transcription support."
293        .to_string())
294}
295
296/// Extract a frame from a video at a given timestamp.
297pub fn extract_video_frame(
298    input: &Path,
299    timestamp_secs: f64,
300    output_dir: &Path,
301) -> Result<PathBuf, String> {
302    let filename = input
303        .file_stem()
304        .and_then(|s| s.to_str())
305        .unwrap_or("frame");
306    let output = output_dir.join(format!("{}_frame.jpg", filename));
307
308    std::fs::create_dir_all(output_dir)
309        .map_err(|e| format!("Failed to create output dir: {}", e))?;
310
311    let result = Command::new("ffmpeg")
312        .args([
313            "-i",
314            input.to_string_lossy().as_ref(),
315            "-ss",
316            &format!("{:.2}", timestamp_secs),
317            "-frames:v",
318            "1",
319            "-y",
320            output.to_string_lossy().as_ref(),
321        ])
322        .stdout(Stdio::piped())
323        .stderr(Stdio::piped())
324        .output();
325
326    match result {
327        Ok(out) if out.status.success() => {
328            debug!(
329                input = %input.display(),
330                timestamp = timestamp_secs,
331                "Video frame extracted"
332            );
333            Ok(output)
334        }
335        _ => Err("Failed to extract video frame. Install ffmpeg for video support.".to_string()),
336    }
337}
338
339/// Detect MIME type of a file using the `file` command.
340pub fn detect_mime_type(path: &Path) -> String {
341    let result = Command::new("file")
342        .args(["--mime-type", "-b"])
343        .arg(path.to_string_lossy().as_ref())
344        .stdout(Stdio::piped())
345        .stderr(Stdio::piped())
346        .output();
347
348    if let Ok(out) = result {
349        if out.status.success() {
350            let mime = String::from_utf8_lossy(&out.stdout).trim().to_string();
351            // The `file` command may succeed but return an error message
352            // (e.g. "cannot open '/path' (No such file or directory)")
353            // instead of a real MIME type.  A valid MIME type is a single
354            // token like "image/jpeg" — no spaces, exactly one slash.
355            if !mime.contains(' ') && mime.matches('/').count() == 1 {
356                return mime;
357            }
358        }
359    }
360
361    // Fallback based on extension
362    let ext = path
363        .extension()
364        .and_then(|e| e.to_str())
365        .unwrap_or("");
366
367    match ext.to_lowercase().as_str() {
368        "jpg" | "jpeg" => "image/jpeg",
369        "png" => "image/png",
370        "gif" => "image/gif",
371        "webp" => "image/webp",
372        "mp3" => "audio/mpeg",
373        "wav" => "audio/wav",
374        "ogg" => "audio/ogg",
375        "mp4" => "video/mp4",
376        "webm" => "video/webm",
377        "pdf" => "application/pdf",
378        _ => "application/octet-stream",
379    }
380    .to_string()
381}
382
383#[cfg(test)]
384mod tests {
385    use super::*;
386
387    #[test]
388    fn test_media_type_from_extension() {
389        assert_eq!(MediaType::from_extension("jpg"), MediaType::Image);
390        assert_eq!(MediaType::from_extension("PNG"), MediaType::Image);
391        assert_eq!(MediaType::from_extension("mp3"), MediaType::Audio);
392        assert_eq!(MediaType::from_extension("mp4"), MediaType::Video);
393        assert_eq!(MediaType::from_extension("pdf"), MediaType::Document);
394        assert_eq!(MediaType::from_extension("xyz"), MediaType::Unknown);
395    }
396
397    #[test]
398    fn test_media_type_from_mime() {
399        assert_eq!(MediaType::from_mime("image/jpeg"), MediaType::Image);
400        assert_eq!(MediaType::from_mime("audio/mpeg"), MediaType::Audio);
401        assert_eq!(MediaType::from_mime("video/mp4"), MediaType::Video);
402        assert_eq!(MediaType::from_mime("application/pdf"), MediaType::Document);
403        assert_eq!(
404            MediaType::from_mime("application/octet-stream"),
405            MediaType::Unknown
406        );
407    }
408
409    #[test]
410    fn test_media_config_defaults() {
411        let config = MediaConfig::default();
412        assert_eq!(config.image_max_bytes, 10 * 1024 * 1024);
413        assert_eq!(config.audio_max_bytes, 25 * 1024 * 1024);
414        assert_eq!(config.video_max_bytes, 50 * 1024 * 1024);
415        assert_eq!(config.max_image_dimension, 2048);
416        assert!(!config.auto_transcribe);
417        assert_eq!(config.whisper_model, "base");
418    }
419
420    #[test]
421    fn test_check_size_limit_nonexistent() {
422        let config = MediaConfig::default();
423        let result = check_size_limit(Path::new("/tmp/nonexistent.jpg"), &config);
424        assert!(result.is_err());
425    }
426
427    #[test]
428    fn test_detect_mime_fallback() {
429        // For a nonexistent file, `file --mime-type` may exit non-zero or
430        // return a non-MIME error string.  Either way, detect_mime_type()
431        // should fall through to the extension-based lookup which returns
432        // "image/jpeg" for a .jpg path.
433        let mime = detect_mime_type(Path::new("/tmp/nonexistent_test_file_that_should_not_exist.jpg"));
434        assert_eq!(mime, "image/jpeg");
435    }
436}