rustyclaw_core/messengers/
media.rs1use serde::{Deserialize, Serialize};
11use std::path::{Path, PathBuf};
12use std::process::{Command, Stdio};
13use tracing::{debug, warn};
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
17#[serde(rename_all = "snake_case")]
18pub enum MediaType {
19 Image,
20 Audio,
21 Video,
22 Document,
23 Unknown,
24}
25
26impl MediaType {
27 pub fn from_extension(ext: &str) -> Self {
29 match ext.to_lowercase().as_str() {
30 "jpg" | "jpeg" | "png" | "gif" | "webp" | "bmp" | "svg" | "tiff" | "ico" => {
31 Self::Image
32 }
33 "mp3" | "wav" | "ogg" | "flac" | "m4a" | "aac" | "wma" | "opus" => Self::Audio,
34 "mp4" | "webm" | "avi" | "mov" | "mkv" | "flv" | "wmv" => Self::Video,
35 "pdf" | "doc" | "docx" | "txt" | "rtf" | "odt" => Self::Document,
36 _ => Self::Unknown,
37 }
38 }
39
40 pub fn from_mime(mime: &str) -> Self {
42 if mime.starts_with("image/") {
43 Self::Image
44 } else if mime.starts_with("audio/") {
45 Self::Audio
46 } else if mime.starts_with("video/") {
47 Self::Video
48 } else if mime.starts_with("application/pdf")
49 || mime.starts_with("application/msword")
50 || mime.starts_with("text/")
51 {
52 Self::Document
53 } else {
54 Self::Unknown
55 }
56 }
57}
58
59#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct MediaConfig {
62 #[serde(default = "default_image_max")]
64 pub image_max_bytes: usize,
65
66 #[serde(default = "default_audio_max")]
68 pub audio_max_bytes: usize,
69
70 #[serde(default = "default_video_max")]
72 pub video_max_bytes: usize,
73
74 #[serde(default = "default_max_dimension")]
76 pub max_image_dimension: u32,
77
78 #[serde(default)]
80 pub auto_transcribe: bool,
81
82 #[serde(default = "default_whisper_model")]
84 pub whisper_model: String,
85
86 #[serde(default = "default_temp_dir")]
88 pub temp_dir: PathBuf,
89}
90
91fn default_image_max() -> usize {
92 10 * 1024 * 1024
93}
94fn default_audio_max() -> usize {
95 25 * 1024 * 1024
96}
97fn default_video_max() -> usize {
98 50 * 1024 * 1024
99}
100fn default_max_dimension() -> u32 {
101 2048
102}
103fn default_whisper_model() -> String {
104 "base".to_string()
105}
106fn default_temp_dir() -> PathBuf {
107 std::env::temp_dir().join("rustyclaw-media")
108}
109
110impl Default for MediaConfig {
111 fn default() -> Self {
112 Self {
113 image_max_bytes: default_image_max(),
114 audio_max_bytes: default_audio_max(),
115 video_max_bytes: default_video_max(),
116 max_image_dimension: default_max_dimension(),
117 auto_transcribe: false,
118 whisper_model: default_whisper_model(),
119 temp_dir: default_temp_dir(),
120 }
121 }
122}
123
124#[derive(Debug, Clone, Serialize)]
126pub struct ProcessedMedia {
127 pub original_path: PathBuf,
129 pub processed_path: PathBuf,
131 pub media_type: MediaType,
133 pub size_bytes: u64,
135 pub transcription: Option<String>,
137 pub description: Option<String>,
139 pub mime_type: String,
141}
142
143pub fn check_size_limit(path: &Path, config: &MediaConfig) -> Result<(), String> {
145 let metadata = std::fs::metadata(path)
146 .map_err(|e| format!("Cannot read file metadata: {}", e))?;
147 let size = metadata.len() as usize;
148
149 let ext = path
150 .extension()
151 .and_then(|e| e.to_str())
152 .unwrap_or("");
153 let media_type = MediaType::from_extension(ext);
154
155 let limit = match media_type {
156 MediaType::Image => config.image_max_bytes,
157 MediaType::Audio => config.audio_max_bytes,
158 MediaType::Video => config.video_max_bytes,
159 _ => config.video_max_bytes, };
161
162 if size > limit {
163 Err(format!(
164 "File size ({} bytes) exceeds limit ({} bytes) for {:?}",
165 size, limit, media_type
166 ))
167 } else {
168 Ok(())
169 }
170}
171
172pub fn resize_image(
174 input: &Path,
175 max_dimension: u32,
176 output_dir: &Path,
177) -> Result<PathBuf, String> {
178 let filename = input
179 .file_name()
180 .and_then(|f| f.to_str())
181 .unwrap_or("output.jpg");
182 let output = output_dir.join(format!("resized_{}", filename));
183
184 std::fs::create_dir_all(output_dir)
185 .map_err(|e| format!("Failed to create output dir: {}", e))?;
186
187 let result = Command::new("convert")
189 .args([
190 input.to_string_lossy().as_ref(),
191 "-resize",
192 &format!("{}x{}>", max_dimension, max_dimension),
193 output.to_string_lossy().as_ref(),
194 ])
195 .stdout(Stdio::piped())
196 .stderr(Stdio::piped())
197 .output();
198
199 if let Ok(out) = result {
200 if out.status.success() {
201 debug!(input = %input.display(), output = %output.display(), "Image resized with ImageMagick");
202 return Ok(output);
203 }
204 }
205
206 let result = Command::new("ffmpeg")
208 .args([
209 "-i",
210 input.to_string_lossy().as_ref(),
211 "-vf",
212 &format!(
213 "scale='min({0},iw)':'min({0},ih)':force_original_aspect_ratio=decrease",
214 max_dimension
215 ),
216 "-y",
217 output.to_string_lossy().as_ref(),
218 ])
219 .stdout(Stdio::piped())
220 .stderr(Stdio::piped())
221 .output();
222
223 if let Ok(out) = result {
224 if out.status.success() {
225 debug!(input = %input.display(), output = %output.display(), "Image resized with ffmpeg");
226 return Ok(output);
227 }
228 }
229
230 warn!("No image resize tools available (install ImageMagick or ffmpeg)");
232 Ok(input.to_path_buf())
233}
234
235pub fn transcribe_audio(input: &Path, model: &str) -> Result<String, String> {
237 let result = Command::new("whisper")
239 .args([
240 input.to_string_lossy().as_ref(),
241 "--model",
242 model,
243 "--output_format",
244 "txt",
245 "--output_dir",
246 "/tmp",
247 ])
248 .stdout(Stdio::piped())
249 .stderr(Stdio::piped())
250 .output();
251
252 if let Ok(out) = result {
253 if out.status.success() {
254 let txt_path = PathBuf::from("/tmp").join(
256 input
257 .file_stem()
258 .and_then(|s| s.to_str())
259 .unwrap_or("audio"),
260 ).with_extension("txt");
261
262 if let Ok(text) = std::fs::read_to_string(&txt_path) {
263 debug!(input = %input.display(), "Audio transcribed with whisper");
264 return Ok(text.trim().to_string());
265 }
266 }
267 }
268
269 let result = Command::new("main")
271 .args([
272 "-m",
273 &format!("models/ggml-{}.bin", model),
274 "-f",
275 input.to_string_lossy().as_ref(),
276 "--output-txt",
277 ])
278 .stdout(Stdio::piped())
279 .stderr(Stdio::piped())
280 .output();
281
282 if let Ok(out) = result {
283 if out.status.success() {
284 let text = String::from_utf8_lossy(&out.stdout).trim().to_string();
285 if !text.is_empty() {
286 return Ok(text);
287 }
288 }
289 }
290
291 Err("Transcription failed. Install whisper (pip install openai-whisper) \
292 or whisper.cpp for audio transcription support."
293 .to_string())
294}
295
296pub fn extract_video_frame(
298 input: &Path,
299 timestamp_secs: f64,
300 output_dir: &Path,
301) -> Result<PathBuf, String> {
302 let filename = input
303 .file_stem()
304 .and_then(|s| s.to_str())
305 .unwrap_or("frame");
306 let output = output_dir.join(format!("{}_frame.jpg", filename));
307
308 std::fs::create_dir_all(output_dir)
309 .map_err(|e| format!("Failed to create output dir: {}", e))?;
310
311 let result = Command::new("ffmpeg")
312 .args([
313 "-i",
314 input.to_string_lossy().as_ref(),
315 "-ss",
316 &format!("{:.2}", timestamp_secs),
317 "-frames:v",
318 "1",
319 "-y",
320 output.to_string_lossy().as_ref(),
321 ])
322 .stdout(Stdio::piped())
323 .stderr(Stdio::piped())
324 .output();
325
326 match result {
327 Ok(out) if out.status.success() => {
328 debug!(
329 input = %input.display(),
330 timestamp = timestamp_secs,
331 "Video frame extracted"
332 );
333 Ok(output)
334 }
335 _ => Err("Failed to extract video frame. Install ffmpeg for video support.".to_string()),
336 }
337}
338
339pub fn detect_mime_type(path: &Path) -> String {
341 let result = Command::new("file")
342 .args(["--mime-type", "-b"])
343 .arg(path.to_string_lossy().as_ref())
344 .stdout(Stdio::piped())
345 .stderr(Stdio::piped())
346 .output();
347
348 if let Ok(out) = result {
349 if out.status.success() {
350 let mime = String::from_utf8_lossy(&out.stdout).trim().to_string();
351 if !mime.contains(' ') && mime.matches('/').count() == 1 {
356 return mime;
357 }
358 }
359 }
360
361 let ext = path
363 .extension()
364 .and_then(|e| e.to_str())
365 .unwrap_or("");
366
367 match ext.to_lowercase().as_str() {
368 "jpg" | "jpeg" => "image/jpeg",
369 "png" => "image/png",
370 "gif" => "image/gif",
371 "webp" => "image/webp",
372 "mp3" => "audio/mpeg",
373 "wav" => "audio/wav",
374 "ogg" => "audio/ogg",
375 "mp4" => "video/mp4",
376 "webm" => "video/webm",
377 "pdf" => "application/pdf",
378 _ => "application/octet-stream",
379 }
380 .to_string()
381}
382
383#[cfg(test)]
384mod tests {
385 use super::*;
386
387 #[test]
388 fn test_media_type_from_extension() {
389 assert_eq!(MediaType::from_extension("jpg"), MediaType::Image);
390 assert_eq!(MediaType::from_extension("PNG"), MediaType::Image);
391 assert_eq!(MediaType::from_extension("mp3"), MediaType::Audio);
392 assert_eq!(MediaType::from_extension("mp4"), MediaType::Video);
393 assert_eq!(MediaType::from_extension("pdf"), MediaType::Document);
394 assert_eq!(MediaType::from_extension("xyz"), MediaType::Unknown);
395 }
396
397 #[test]
398 fn test_media_type_from_mime() {
399 assert_eq!(MediaType::from_mime("image/jpeg"), MediaType::Image);
400 assert_eq!(MediaType::from_mime("audio/mpeg"), MediaType::Audio);
401 assert_eq!(MediaType::from_mime("video/mp4"), MediaType::Video);
402 assert_eq!(MediaType::from_mime("application/pdf"), MediaType::Document);
403 assert_eq!(
404 MediaType::from_mime("application/octet-stream"),
405 MediaType::Unknown
406 );
407 }
408
409 #[test]
410 fn test_media_config_defaults() {
411 let config = MediaConfig::default();
412 assert_eq!(config.image_max_bytes, 10 * 1024 * 1024);
413 assert_eq!(config.audio_max_bytes, 25 * 1024 * 1024);
414 assert_eq!(config.video_max_bytes, 50 * 1024 * 1024);
415 assert_eq!(config.max_image_dimension, 2048);
416 assert!(!config.auto_transcribe);
417 assert_eq!(config.whisper_model, "base");
418 }
419
420 #[test]
421 fn test_check_size_limit_nonexistent() {
422 let config = MediaConfig::default();
423 let result = check_size_limit(Path::new("/tmp/nonexistent.jpg"), &config);
424 assert!(result.is_err());
425 }
426
427 #[test]
428 fn test_detect_mime_fallback() {
429 let mime = detect_mime_type(Path::new("/tmp/nonexistent_test_file_that_should_not_exist.jpg"));
434 assert_eq!(mime, "image/jpeg");
435 }
436}