1#[cfg(feature = "content-processing")]
7use crate::content_processing::{
8 AudioEnergyMetrics, AudioFeatures, ContentExtractionConfig, ContentLocation, DocumentFormat,
9 DocumentStructure, ExtractedAudio, ExtractedContent, ExtractedImage, ExtractedVideo,
10 FormatHandler, MotionAnalysis, MusicAnalysis, PitchStatistics, ProcessingStats, SpeechAnalysis,
11 VideoAnalysis,
12};
13#[cfg(feature = "content-processing")]
14use anyhow::{anyhow, Result};
15#[cfg(feature = "content-processing")]
16use base64::{engine::general_purpose::STANDARD, Engine as _};
17#[cfg(feature = "content-processing")]
18use std::collections::HashMap;
19
20#[cfg(feature = "content-processing")]
22pub struct ImageHandler;
23
24#[cfg(feature = "content-processing")]
25impl FormatHandler for ImageHandler {
26 fn extract_content(
27 &self,
28 data: &[u8],
29 config: &ContentExtractionConfig,
30 ) -> Result<ExtractedContent> {
31 let mut metadata = HashMap::new();
32 let mut images = Vec::new();
33
34 let format = detect_image_format(data)?;
36 metadata.insert("format".to_string(), format.clone());
37
38 if config.extract_images || config.generate_image_embeddings {
39 let extracted_image = extract_image_features(data, config)?;
40 images.push(extracted_image);
41 }
42
43 if config.extract_metadata {
45 if let Ok(dimensions) = get_image_dimensions(data) {
46 metadata.insert("width".to_string(), dimensions.0.to_string());
47 metadata.insert("height".to_string(), dimensions.1.to_string());
48 }
49 }
50
51 let text = if config.extract_text {
52 format!("Image content: {} format, {} bytes", format, data.len())
53 } else {
54 String::new()
55 };
56
57 Ok(ExtractedContent {
58 format: DocumentFormat::Image,
59 text,
60 metadata,
61 images,
62 tables: Vec::new(),
63 links: Vec::new(),
64 structure: DocumentStructure {
65 title: None,
66 headings: Vec::new(),
67 page_count: 1,
68 section_count: 1,
69 table_of_contents: Vec::new(),
70 },
71 chunks: Vec::new(),
72 language: None,
73 processing_stats: ProcessingStats::default(),
74 audio_content: Vec::new(),
75 video_content: Vec::new(),
76 cross_modal_embeddings: Vec::new(),
77 })
78 }
79
80 fn can_handle(&self, data: &[u8]) -> bool {
81 detect_image_format(data).is_ok()
82 }
83
84 fn supported_extensions(&self) -> Vec<&'static str> {
85 vec![
86 "jpg", "jpeg", "png", "gif", "webp", "bmp", "tiff", "tif", "svg",
87 ]
88 }
89}
90
91#[cfg(feature = "content-processing")]
93pub struct AudioHandler;
94
95#[cfg(feature = "content-processing")]
96impl FormatHandler for AudioHandler {
97 fn extract_content(
98 &self,
99 data: &[u8],
100 config: &ContentExtractionConfig,
101 ) -> Result<ExtractedContent> {
102 let mut metadata = HashMap::new();
103 let mut audio_content = Vec::new();
104
105 let format = detect_audio_format(data)?;
106 metadata.insert("format".to_string(), format.clone());
107
108 if config.extract_audio_features {
109 let extracted_audio = extract_audio_features(data, config)?;
110 audio_content.push(extracted_audio);
111 }
112
113 let text = if config.extract_text {
114 format!("Audio content: {} format, {} bytes", format, data.len())
115 } else {
116 String::new()
117 };
118
119 Ok(ExtractedContent {
120 format: DocumentFormat::Audio,
121 text,
122 metadata,
123 images: Vec::new(),
124 tables: Vec::new(),
125 links: Vec::new(),
126 structure: DocumentStructure {
127 title: None,
128 headings: Vec::new(),
129 page_count: 1,
130 section_count: 1,
131 table_of_contents: Vec::new(),
132 },
133 chunks: Vec::new(),
134 language: None,
135 processing_stats: ProcessingStats::default(),
136 audio_content,
137 video_content: Vec::new(),
138 cross_modal_embeddings: Vec::new(),
139 })
140 }
141
142 fn can_handle(&self, data: &[u8]) -> bool {
143 detect_audio_format(data).is_ok()
144 }
145
146 fn supported_extensions(&self) -> Vec<&'static str> {
147 vec!["mp3", "wav", "ogg", "flac", "aac", "m4a", "wma"]
148 }
149}
150
151#[cfg(feature = "content-processing")]
153pub struct VideoHandler;
154
155#[cfg(feature = "content-processing")]
156impl FormatHandler for VideoHandler {
157 fn extract_content(
158 &self,
159 data: &[u8],
160 config: &ContentExtractionConfig,
161 ) -> Result<ExtractedContent> {
162 let mut metadata = HashMap::new();
163 let mut video_content = Vec::new();
164
165 let format = detect_video_format(data)?;
166 metadata.insert("format".to_string(), format.clone());
167
168 if config.extract_video_features {
169 let extracted_video = extract_video_features(data, config)?;
170 video_content.push(extracted_video);
171 }
172
173 let text = if config.extract_text {
174 format!("Video content: {} format, {} bytes", format, data.len())
175 } else {
176 String::new()
177 };
178
179 Ok(ExtractedContent {
180 format: DocumentFormat::Video,
181 text,
182 metadata,
183 images: Vec::new(),
184 tables: Vec::new(),
185 links: Vec::new(),
186 structure: DocumentStructure {
187 title: None,
188 headings: Vec::new(),
189 page_count: 1,
190 section_count: 1,
191 table_of_contents: Vec::new(),
192 },
193 chunks: Vec::new(),
194 language: None,
195 processing_stats: ProcessingStats::default(),
196 audio_content: Vec::new(),
197 video_content,
198 cross_modal_embeddings: Vec::new(),
199 })
200 }
201
202 fn can_handle(&self, data: &[u8]) -> bool {
203 detect_video_format(data).is_ok()
204 }
205
206 fn supported_extensions(&self) -> Vec<&'static str> {
207 vec!["mp4", "avi", "mkv", "webm", "mov", "wmv", "flv", "m4v"]
208 }
209}
210
211#[cfg(feature = "content-processing")]
214fn detect_image_format(data: &[u8]) -> Result<String> {
215 if data.len() < 8 {
216 return Err(anyhow!("Data too short to determine image format"));
217 }
218
219 match &data[0..4] {
221 [0xFF, 0xD8, 0xFF, _] => Ok("JPEG".to_string()),
222 [0x89, 0x50, 0x4E, 0x47] => Ok("PNG".to_string()),
223 [0x47, 0x49, 0x46, 0x38] => Ok("GIF".to_string()),
224 _ => {
225 if data.starts_with(b"RIFF") && data[8..12] == *b"WEBP" {
226 Ok("WebP".to_string())
227 } else if data.starts_with(b"BM") {
228 Ok("BMP".to_string())
229 } else if data.starts_with(b"II*\0") || data.starts_with(b"MM\0*") {
230 Ok("TIFF".to_string())
231 } else if data.starts_with(b"<svg") || data.starts_with(b"<?xml") {
232 Ok("SVG".to_string())
233 } else {
234 Err(anyhow!("Unknown image format"))
235 }
236 }
237 }
238}
239
240#[cfg(feature = "content-processing")]
241fn detect_audio_format(data: &[u8]) -> Result<String> {
242 if data.len() < 12 {
243 return Err(anyhow!("Data too short to determine audio format"));
244 }
245
246 if data.starts_with(b"ID3") || (data.len() > 2 && data[0] == 0xFF && (data[1] & 0xE0) == 0xE0) {
248 Ok("MP3".to_string())
249 } else if data.starts_with(b"RIFF") && data[8..12] == *b"WAVE" {
250 Ok("WAV".to_string())
251 } else if data.starts_with(b"OggS") {
252 Ok("OGG".to_string())
253 } else if data.starts_with(b"fLaC") {
254 Ok("FLAC".to_string())
255 } else if data[4..8] == *b"ftyp" {
256 Ok("M4A/AAC".to_string())
257 } else {
258 Err(anyhow!("Unknown audio format"))
259 }
260}
261
262#[cfg(feature = "content-processing")]
263fn detect_video_format(data: &[u8]) -> Result<String> {
264 if data.len() < 12 {
265 return Err(anyhow!("Data too short to determine video format"));
266 }
267
268 if data[4..8] == *b"ftyp" {
270 Ok("MP4".to_string())
271 } else if data.starts_with(b"RIFF") && data[8..12] == *b"AVI " {
272 Ok("AVI".to_string())
273 } else if data.starts_with(&[0x1A, 0x45, 0xDF, 0xA3]) {
274 Ok("MKV".to_string())
275 } else if data.starts_with(&[0x1A, 0x45, 0xDF, 0xA3]) {
276 Ok("WebM".to_string())
277 } else {
278 Err(anyhow!("Unknown video format"))
279 }
280}
281
282#[cfg(feature = "content-processing")]
283fn get_image_dimensions(data: &[u8]) -> Result<(u32, u32)> {
284 match detect_image_format(data)?.as_str() {
287 "PNG" => extract_png_dimensions(data),
288 "JPEG" => extract_jpeg_dimensions(data),
289 _ => Err(anyhow!(
290 "Dimension extraction not implemented for this format"
291 )),
292 }
293}
294
295#[cfg(feature = "content-processing")]
296fn extract_png_dimensions(data: &[u8]) -> Result<(u32, u32)> {
297 if data.len() < 24 {
298 return Err(anyhow!("PNG data too short"));
299 }
300
301 let width = u32::from_be_bytes([data[16], data[17], data[18], data[19]]);
303 let height = u32::from_be_bytes([data[20], data[21], data[22], data[23]]);
304
305 Ok((width, height))
306}
307
308#[cfg(feature = "content-processing")]
309fn extract_jpeg_dimensions(_data: &[u8]) -> Result<(u32, u32)> {
310 Ok((0, 0))
313}
314
315#[cfg(feature = "content-processing")]
316fn extract_image_features(
317 data: &[u8],
318 _config: &ContentExtractionConfig,
319) -> Result<ExtractedImage> {
320 let format = detect_image_format(data)?;
321 let dimensions = get_image_dimensions(data).unwrap_or((0, 0));
322
323 Ok(ExtractedImage {
325 data: STANDARD.encode(data),
326 format,
327 width: dimensions.0,
328 height: dimensions.1,
329 alt_text: None,
330 caption: None,
331 location: ContentLocation {
332 page: None,
333 section: None,
334 char_offset: None,
335 line: None,
336 column: None,
337 },
338 visual_features: None,
339 embedding: None, detected_objects: Vec::new(),
341 classification_labels: Vec::new(),
342 })
343}
344
345#[cfg(feature = "content-processing")]
346fn extract_audio_features(
347 data: &[u8],
348 _config: &ContentExtractionConfig,
349) -> Result<ExtractedAudio> {
350 let format = detect_audio_format(data)?;
351
352 Ok(ExtractedAudio {
354 data: STANDARD.encode(data),
355 format,
356 duration: 0.0, sample_rate: 44100, channels: 2, audio_features: Some(AudioFeatures {
360 mfcc: None,
361 spectral_features: None,
362 rhythm_features: None,
363 harmonic_features: None,
364 zero_crossing_rate: 0.0,
365 energy_metrics: AudioEnergyMetrics {
366 rms_energy: 0.0,
367 peak_amplitude: 0.0,
368 average_loudness: 0.0,
369 dynamic_range: 0.0,
370 },
371 }),
372 embedding: None,
373 transcription: None,
374 music_analysis: Some(MusicAnalysis {
375 tempo: None,
376 key: None,
377 time_signature: None,
378 genre: None,
379 valence: None,
380 energy: None,
381 }),
382 speech_analysis: Some(SpeechAnalysis {
383 language: None,
384 speaker_gender: None,
385 emotion: None,
386 speech_rate: None,
387 pitch_stats: Some(PitchStatistics {
388 mean_pitch: 0.0,
389 pitch_std: 0.0,
390 pitch_range: 0.0,
391 }),
392 }),
393 })
394}
395
396#[cfg(feature = "content-processing")]
397fn extract_video_features(
398 data: &[u8],
399 _config: &ContentExtractionConfig,
400) -> Result<ExtractedVideo> {
401 let format = detect_video_format(data)?;
402
403 Ok(ExtractedVideo {
405 data: STANDARD.encode(data),
406 format,
407 duration: 0.0, frame_rate: 30.0, resolution: (1920, 1080), keyframes: Vec::new(),
411 embedding: None,
412 audio_analysis: None,
413 video_analysis: Some(VideoAnalysis {
414 scenes: Vec::new(),
415 motion_analysis: Some(MotionAnalysis {
416 average_motion: 0.0,
417 motion_variance: 0.0,
418 camera_motion: None,
419 object_motion: Vec::new(),
420 }),
421 activity_level: 0.0,
422 color_timeline: Vec::new(),
423 }),
424 })
425}