1#[cfg(feature = "ocr")]
25mod image;
26mod subtitle;
27mod text;
28#[cfg(feature = "transcription")]
29pub mod transcription;
30
31#[cfg(feature = "ocr")]
32pub use image::ImageLoader;
33pub use subtitle::SubtitleLoader;
34pub use text::TextLoader;
35#[cfg(feature = "transcription")]
36pub use transcription::TranscriptionLoader;
37
38use crate::{Document, Error, Result};
39use std::path::Path;
40
41pub trait DocumentLoader: Send + Sync {
47 fn supported_extensions(&self) -> Vec<&str>;
49
50 fn can_load(&self, path: &Path) -> bool {
55 path.extension()
56 .and_then(|ext| ext.to_str())
57 .map(|ext| {
58 let lower = ext.to_lowercase();
59 self.supported_extensions().iter().any(|s| *s == lower)
60 })
61 .unwrap_or(false)
62 }
63
64 fn load(&self, path: &Path) -> Result<Document>;
72}
73
74pub struct LoaderRegistry {
79 loaders: Vec<Box<dyn DocumentLoader>>,
80}
81
82impl LoaderRegistry {
83 #[must_use]
85 pub fn new() -> Self {
86 let mut registry = Self { loaders: Vec::new() };
87 registry.register(Box::new(TextLoader));
88 registry.register(Box::new(SubtitleLoader));
89 #[cfg(feature = "ocr")]
90 registry.register(Box::new(ImageLoader));
91 registry
92 }
93
94 pub fn register(&mut self, loader: Box<dyn DocumentLoader>) {
96 self.loaders.push(loader);
97 }
98
99 #[must_use]
101 pub fn loader_for(&self, path: &Path) -> Option<&dyn DocumentLoader> {
102 self.loaders.iter().find(|l| l.can_load(path)).map(|l| l.as_ref())
103 }
104
105 pub fn load(&self, path: &Path) -> Result<Document> {
107 let loader = self.loader_for(path).ok_or_else(|| {
108 Error::InvalidInput(format!("No loader registered for: {}", path.display()))
109 })?;
110 loader.load(path)
111 }
112
113 #[must_use]
117 pub fn find_sidecar(media_path: &Path) -> Option<std::path::PathBuf> {
118 for ext in &["srt", "vtt"] {
119 let sidecar = media_path.with_extension(ext);
120 if sidecar.exists() {
121 return Some(sidecar);
122 }
123 }
124 None
125 }
126
127 #[must_use]
129 pub fn supported_extensions(&self) -> Vec<&str> {
130 self.loaders.iter().flat_map(|l| l.supported_extensions()).collect()
131 }
132}
133
134impl Default for LoaderRegistry {
135 fn default() -> Self {
136 Self::new()
137 }
138}
139
140impl std::fmt::Debug for LoaderRegistry {
141 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
142 f.debug_struct("LoaderRegistry")
143 .field("loader_count", &self.loaders.len())
144 .field("extensions", &self.supported_extensions())
145 .finish()
146 }
147}
148
149#[cfg(test)]
150mod tests {
151 use super::*;
152
153 #[test]
154 fn test_registry_default_loaders() {
155 let registry = LoaderRegistry::new();
156 let exts = registry.supported_extensions();
157 assert!(exts.contains(&"txt"));
158 assert!(exts.contains(&"md"));
159 assert!(exts.contains(&"srt"));
160 assert!(exts.contains(&"vtt"));
161 }
162
163 #[test]
164 fn test_registry_loader_for_txt() {
165 let registry = LoaderRegistry::new();
166 assert!(registry.loader_for(Path::new("file.txt")).is_some());
167 assert!(registry.loader_for(Path::new("file.TXT")).is_some());
168 }
169
170 #[test]
171 fn test_registry_loader_for_srt() {
172 let registry = LoaderRegistry::new();
173 assert!(registry.loader_for(Path::new("file.srt")).is_some());
174 }
175
176 #[test]
177 fn test_registry_no_loader_for_unknown() {
178 let registry = LoaderRegistry::new();
179 assert!(registry.loader_for(Path::new("file.xyz")).is_none());
180 }
181
182 #[test]
183 fn test_registry_load_missing_file() {
184 let registry = LoaderRegistry::new();
185 let result = registry.load(Path::new("/nonexistent/file.txt"));
186 assert!(result.is_err());
187 }
188
189 #[test]
190 fn test_registry_load_unsupported_format() {
191 let registry = LoaderRegistry::new();
192 let result = registry.load(Path::new("file.mp4"));
193 assert!(result.is_err());
194 }
195
196 #[test]
197 fn test_find_sidecar_none() {
198 assert!(
200 LoaderRegistry::find_sidecar(Path::new("/tmp/nonexistent_video_12345.mp4")).is_none()
201 );
202 }
203
204 #[test]
205 fn test_registry_custom_loader() {
206 struct DummyLoader;
207 impl DocumentLoader for DummyLoader {
208 fn supported_extensions(&self) -> Vec<&str> {
209 vec!["xyz"]
210 }
211 fn load(&self, path: &Path) -> Result<Document> {
212 Ok(Document::new("dummy").with_source(path.to_string_lossy()))
213 }
214 }
215
216 let mut registry = LoaderRegistry::new();
217 registry.register(Box::new(DummyLoader));
218 assert!(registry.loader_for(Path::new("test.xyz")).is_some());
219 }
220
221 #[test]
222 fn test_registry_debug() {
223 let registry = LoaderRegistry::new();
224 let debug = format!("{registry:?}");
225 assert!(debug.contains("LoaderRegistry"));
226 assert!(debug.contains("loader_count"));
227 }
228
229 #[test]
230 fn test_registry_default() {
231 let registry = LoaderRegistry::default();
232 assert!(!registry.supported_extensions().is_empty());
233 }
234
235 #[test]
236 fn test_find_sidecar_srt_preferred() {
237 let dir = std::env::temp_dir().join("trueno_rag_test_sidecar");
239 let _ = std::fs::create_dir_all(&dir);
240 let video = dir.join("lecture.mp4");
241 let srt = dir.join("lecture.srt");
242 let vtt = dir.join("lecture.vtt");
243 std::fs::write(&video, b"").unwrap();
244 std::fs::write(&srt, b"").unwrap();
245 std::fs::write(&vtt, b"").unwrap();
246
247 let found = LoaderRegistry::find_sidecar(&video);
248 assert!(found.is_some());
249 assert_eq!(found.unwrap().extension().unwrap(), "srt");
251
252 let _ = std::fs::remove_dir_all(&dir);
254 }
255
256 #[test]
257 fn test_can_load_no_extension() {
258 let loader = TextLoader;
259 assert!(!loader.can_load(Path::new("Makefile")));
260 }
261
262 #[test]
263 fn test_load_real_txt_file() {
264 let dir = std::env::temp_dir().join("trueno_rag_test_load_txt");
265 let _ = std::fs::create_dir_all(&dir);
266 let file = dir.join("test.txt");
267 std::fs::write(&file, "Hello from test file.").unwrap();
268
269 let registry = LoaderRegistry::new();
270 let doc = registry.load(&file).unwrap();
271 assert_eq!(doc.content, "Hello from test file.");
272 assert!(doc.title.is_some());
273
274 let _ = std::fs::remove_dir_all(&dir);
275 }
276
277 #[test]
278 fn test_load_real_srt_file() {
279 let dir = std::env::temp_dir().join("trueno_rag_test_load_srt");
280 let _ = std::fs::create_dir_all(&dir);
281 let file = dir.join("test.srt");
282 std::fs::write(&file, "1\n00:00:01,000 --> 00:00:04,500\nHello from subtitle.\n").unwrap();
283
284 let registry = LoaderRegistry::new();
285 let doc = registry.load(&file).unwrap();
286 assert!(doc.content.contains("Hello from subtitle"));
287 assert!(doc.metadata.contains_key("subtitle_cues"));
288
289 let _ = std::fs::remove_dir_all(&dir);
290 }
291}