trueno_rag/loader/
subtitle.rs1use crate::media::parse_subtitles;
4use crate::{Document, Result};
5use std::collections::HashMap;
6use std::path::Path;
7
8use super::DocumentLoader;
9
10#[derive(Debug, Clone, Copy)]
17pub struct SubtitleLoader;
18
19impl DocumentLoader for SubtitleLoader {
20 fn supported_extensions(&self) -> Vec<&str> {
21 vec!["srt", "vtt"]
22 }
23
24 fn load(&self, path: &Path) -> Result<Document> {
25 let raw = std::fs::read_to_string(path).map_err(crate::Error::Io)?;
26 let track = parse_subtitles(&raw)?;
27
28 let title = path.file_stem().and_then(|s| s.to_str()).unwrap_or("Untitled").to_string();
29
30 let mut metadata = HashMap::new();
31 metadata.insert("duration_secs".into(), serde_json::json!(track.duration_secs()));
32 metadata.insert("format".into(), serde_json::json!(track.format.to_string()));
33 metadata.insert("cue_count".into(), serde_json::json!(track.cues.len()));
34 metadata.insert(
35 "subtitle_cues".into(),
36 serde_json::to_value(&track.cues).map_err(crate::Error::Serialization)?,
37 );
38
39 let mut doc = Document::new(track.to_plain_text())
40 .with_title(title)
41 .with_source(path.to_string_lossy());
42 doc.metadata = metadata;
43 Ok(doc)
44 }
45}
46
47#[cfg(test)]
48mod tests {
49 use super::*;
50
51 #[test]
52 fn test_subtitle_loader_extensions() {
53 let loader = SubtitleLoader;
54 let exts = loader.supported_extensions();
55 assert!(exts.contains(&"srt"));
56 assert!(exts.contains(&"vtt"));
57 }
58
59 #[test]
60 fn test_subtitle_loader_can_load() {
61 let loader = SubtitleLoader;
62 assert!(loader.can_load(Path::new("lecture.srt")));
63 assert!(loader.can_load(Path::new("captions.VTT")));
64 assert!(!loader.can_load(Path::new("file.txt")));
65 }
66
67 #[test]
68 fn test_subtitle_loader_load_srt() {
69 let dir = std::env::temp_dir().join("trueno_rag_test_sub_loader_srt");
70 let _ = std::fs::create_dir_all(&dir);
71 let file = dir.join("lecture.srt");
72 std::fs::write(
73 &file,
74 "\
751
7600:00:01,000 --> 00:00:04,500
77First cue text.
78
792
8000:00:05,000 --> 00:00:09,200
81Second cue text.
82",
83 )
84 .unwrap();
85
86 let loader = SubtitleLoader;
87 let doc = loader.load(&file).unwrap();
88
89 assert!(doc.content.contains("First cue text."));
90 assert!(doc.content.contains("Second cue text."));
91 assert_eq!(doc.title.as_deref(), Some("lecture"));
92 assert!(doc.metadata.contains_key("duration_secs"));
93 assert!(doc.metadata.contains_key("subtitle_cues"));
94 assert_eq!(doc.metadata["cue_count"], serde_json::json!(2));
95 assert_eq!(doc.metadata["format"], serde_json::json!("srt"));
96
97 let _ = std::fs::remove_dir_all(&dir);
98 }
99
100 #[test]
101 fn test_subtitle_loader_load_vtt() {
102 let dir = std::env::temp_dir().join("trueno_rag_test_sub_loader_vtt");
103 let _ = std::fs::create_dir_all(&dir);
104 let file = dir.join("captions.vtt");
105 std::fs::write(
106 &file,
107 "\
108WEBVTT
109
11000:00:01.000 --> 00:00:04.500
111VTT cue text.
112",
113 )
114 .unwrap();
115
116 let loader = SubtitleLoader;
117 let doc = loader.load(&file).unwrap();
118
119 assert!(doc.content.contains("VTT cue text"));
120 assert_eq!(doc.metadata["format"], serde_json::json!("vtt"));
121
122 let _ = std::fs::remove_dir_all(&dir);
123 }
124
125 #[test]
126 fn test_subtitle_loader_metadata_duration() {
127 let dir = std::env::temp_dir().join("trueno_rag_test_sub_duration");
128 let _ = std::fs::create_dir_all(&dir);
129 let file = dir.join("timed.srt");
130 std::fs::write(
131 &file,
132 "\
1331
13400:01:00,000 --> 00:02:30,000
135One minute in.
136",
137 )
138 .unwrap();
139
140 let loader = SubtitleLoader;
141 let doc = loader.load(&file).unwrap();
142
143 let duration = doc.metadata["duration_secs"].as_f64().unwrap();
144 assert!((duration - 150.0).abs() < 0.1);
145
146 let _ = std::fs::remove_dir_all(&dir);
147 }
148
149 #[test]
150 fn test_subtitle_loader_missing_file() {
151 let loader = SubtitleLoader;
152 let result = loader.load(Path::new("/nonexistent/file.srt"));
153 assert!(result.is_err());
154 }
155
156 #[test]
157 fn test_subtitle_loader_cues_deserializable() {
158 let dir = std::env::temp_dir().join("trueno_rag_test_sub_cues_deser");
159 let _ = std::fs::create_dir_all(&dir);
160 let file = dir.join("test.srt");
161 std::fs::write(
162 &file,
163 "1\n00:00:01,000 --> 00:00:04,500\nHello.\n\n2\n00:00:05,000 --> 00:00:09,000\nWorld.\n",
164 )
165 .unwrap();
166
167 let loader = SubtitleLoader;
168 let doc = loader.load(&file).unwrap();
169
170 let cues: Vec<crate::media::SubtitleCue> =
172 serde_json::from_value(doc.metadata["subtitle_cues"].clone()).unwrap();
173 assert_eq!(cues.len(), 2);
174 assert_eq!(cues[0].text, "Hello.");
175 assert_eq!(cues[1].text, "World.");
176
177 let _ = std::fs::remove_dir_all(&dir);
178 }
179}