trueno_rag/loader/
image.rs1use crate::{Document, Result};
12use std::path::Path;
13use std::process::Command;
14
15use super::DocumentLoader;
16
17#[derive(Debug, Clone, Copy)]
21pub struct ImageLoader;
22
23impl DocumentLoader for ImageLoader {
24 fn supported_extensions(&self) -> Vec<&str> {
25 vec!["png", "jpg", "jpeg", "tiff", "bmp"]
26 }
27
28 fn load(&self, path: &Path) -> Result<Document> {
29 let output = Command::new("tesseract")
30 .arg(path.as_os_str())
31 .arg("stdout")
32 .arg("--psm")
33 .arg("6") .output()
35 .map_err(|e| {
36 crate::Error::InvalidConfig(format!(
37 "Failed to run tesseract (is it installed?): {e}"
38 ))
39 })?;
40
41 if !output.status.success() {
42 let stderr = String::from_utf8_lossy(&output.stderr);
43 return Err(crate::Error::InvalidConfig(format!(
44 "Tesseract failed on {}: {}",
45 path.display(),
46 stderr.trim()
47 )));
48 }
49
50 let content = String::from_utf8_lossy(&output.stdout).trim().to_string();
51
52 let title = path.file_stem().and_then(|s| s.to_str()).unwrap_or("Untitled").to_string();
53
54 let mut doc = Document::new(content).with_title(title).with_source(path.to_string_lossy());
56
57 if let Some(secs) = extract_timestamp_from_filename(path) {
59 doc.metadata.insert(
60 "frame_time_secs".to_string(),
61 serde_json::Value::Number(
62 serde_json::Number::from_f64(secs)
63 .unwrap_or_else(|| serde_json::Number::from(0)),
64 ),
65 );
66 }
67
68 Ok(doc)
69 }
70}
71
72fn extract_timestamp_from_filename(path: &Path) -> Option<f64> {
74 let stem = path.file_stem()?.to_str()?;
75 if let Some(rest) = stem.strip_prefix("frame_") {
77 let numeric = rest.trim_end_matches('s');
78 numeric.parse::<f64>().ok()
79 } else {
80 None
81 }
82}
83
84#[cfg(test)]
85mod tests {
86 use super::*;
87
88 #[test]
89 fn test_image_loader_extensions() {
90 let loader = ImageLoader;
91 let exts = loader.supported_extensions();
92 assert!(exts.contains(&"png"));
93 assert!(exts.contains(&"jpg"));
94 assert!(exts.contains(&"jpeg"));
95 assert!(exts.contains(&"tiff"));
96 assert!(exts.contains(&"bmp"));
97 }
98
99 #[test]
100 fn test_image_loader_can_load() {
101 let loader = ImageLoader;
102 assert!(loader.can_load(Path::new("slide.png")));
103 assert!(loader.can_load(Path::new("photo.JPG")));
104 assert!(!loader.can_load(Path::new("video.mp4")));
105 assert!(!loader.can_load(Path::new("notes.txt")));
106 }
107
108 #[test]
109 fn test_extract_timestamp_from_filename() {
110 assert_eq!(extract_timestamp_from_filename(Path::new("frame_120s.png")), Some(120.0));
111 assert_eq!(extract_timestamp_from_filename(Path::new("frame_45.5s.png")), Some(45.5));
112 assert_eq!(extract_timestamp_from_filename(Path::new("frame_0.png")), Some(0.0));
113 assert_eq!(extract_timestamp_from_filename(Path::new("slide_01.png")), None);
114 assert_eq!(extract_timestamp_from_filename(Path::new("photo.jpg")), None);
115 }
116
117 #[test]
118 fn test_image_loader_missing_file() {
119 let loader = ImageLoader;
120 let result = loader.load(Path::new("/nonexistent/image.png"));
121 assert!(result.is_err());
123 }
124}