1use crate::core::KnowledgeEntry;
10use crate::error::{Error, Result};
11use crate::ingest::{
12 AppleVisionOcrBackend, AppleVisionOcrConfig, ImageOcrRequest, OcrBackend, OcrBlockKind,
13 OcrDocument, OcrTargetKind, SourceKind, SourceProvenance, TesseractOcrBackend,
14 TesseractOcrConfig,
15};
16use chrono::{DateTime, Utc};
17use serde::{Deserialize, Serialize};
18use std::collections::HashMap;
19use std::fs;
20use std::path::{Path, PathBuf};
21use std::sync::Arc;
22
23#[derive(Debug, Clone, Serialize, Deserialize)]
25pub struct ScreenshotTextChunk {
26 pub content: String,
28 pub block_kind: OcrBlockKind,
30 pub confidence: Option<f32>,
32 pub weight: f32,
34}
35
36#[derive(Debug, Clone, Serialize, Deserialize)]
38pub struct ScreenshotDocument {
39 pub path: PathBuf,
41 pub title: String,
43 pub ocr: OcrDocument,
45 pub metadata: HashMap<String, String>,
47 pub chunks: Vec<ScreenshotTextChunk>,
49}
50
51impl ScreenshotDocument {
52 pub fn indexing_text(&self) -> String {
54 self.chunks
55 .iter()
56 .map(|chunk| chunk.content.trim())
57 .filter(|content| !content.is_empty())
58 .collect::<Vec<_>>()
59 .join("\n")
60 }
61
62 pub fn to_knowledge_entry(&self) -> KnowledgeEntry {
64 let content = self.indexing_text();
65 let mut entry = KnowledgeEntry::new(self.title.clone(), content)
66 .with_category("Screenshot")
67 .with_tags(["screenshot", "ocr"])
68 .with_source(self.path.to_string_lossy().into_owned());
69
70 for (key, value) in self.ocr.provenance.metadata_pairs() {
71 entry = entry.with_metadata(key, value);
72 }
73
74 for (key, value) in &self.metadata {
75 entry = entry.with_metadata(format!("screenshot.{key}"), value.clone());
76 }
77
78 entry
79 .with_metadata("screenshot.chunk_count", self.chunks.len().to_string())
80 .with_metadata(
81 "screenshot.ocr_engine",
82 match self.ocr.engine {
83 crate::ingest::OcrEngine::AppleVision => "apple_vision",
84 crate::ingest::OcrEngine::Tesseract => "tesseract",
85 crate::ingest::OcrEngine::Mock => "mock",
86 crate::ingest::OcrEngine::External => "external",
87 },
88 )
89 }
90}
91
92#[derive(Debug, Clone)]
94pub struct ScreenshotIngesterConfig {
95 pub language_hints: Vec<String>,
97 pub min_confidence: Option<f32>,
99 pub include_ui_chrome: bool,
101 pub ui_chrome_weight: f32,
103 pub content_weight: f32,
105}
106
107impl Default for ScreenshotIngesterConfig {
108 fn default() -> Self {
109 Self {
110 language_hints: vec!["en".to_string()],
111 min_confidence: Some(0.5),
112 include_ui_chrome: true,
113 ui_chrome_weight: 0.4,
114 content_weight: 1.0,
115 }
116 }
117}
118
119impl ScreenshotIngesterConfig {
120 pub fn without_confidence_filter(mut self) -> Self {
122 self.min_confidence = None;
123 self
124 }
125
126 pub fn with_ui_chrome(mut self, include: bool) -> Self {
128 self.include_ui_chrome = include;
129 self
130 }
131}
132
133#[derive(Clone)]
135pub struct ScreenshotIngester {
136 backend: Arc<dyn OcrBackend>,
137 config: ScreenshotIngesterConfig,
138}
139
140impl ScreenshotIngester {
141 pub fn new(backend: Arc<dyn OcrBackend>) -> Self {
143 Self {
144 backend,
145 config: ScreenshotIngesterConfig::default(),
146 }
147 }
148
149 pub fn with_config(backend: Arc<dyn OcrBackend>, config: ScreenshotIngesterConfig) -> Self {
151 Self { backend, config }
152 }
153
154 pub fn with_tesseract() -> Self {
156 Self::new(Arc::new(TesseractOcrBackend::new()))
157 }
158
159 pub fn with_tesseract_config(ocr_config: TesseractOcrConfig) -> Self {
161 Self::new(Arc::new(TesseractOcrBackend::with_config(ocr_config)))
162 }
163
164 pub fn with_tesseract_and_config(
166 ocr_config: TesseractOcrConfig,
167 config: ScreenshotIngesterConfig,
168 ) -> Self {
169 Self::with_config(
170 Arc::new(TesseractOcrBackend::with_config(ocr_config)),
171 config,
172 )
173 }
174
175 pub fn with_apple_vision() -> Self {
177 Self::new(Arc::new(AppleVisionOcrBackend::new()))
178 }
179
180 pub fn with_apple_vision_config(ocr_config: AppleVisionOcrConfig) -> Self {
182 Self::new(Arc::new(AppleVisionOcrBackend::with_config(ocr_config)))
183 }
184
185 pub fn with_apple_vision_and_config(
187 ocr_config: AppleVisionOcrConfig,
188 config: ScreenshotIngesterConfig,
189 ) -> Self {
190 Self::with_config(
191 Arc::new(AppleVisionOcrBackend::with_config(ocr_config)),
192 config,
193 )
194 }
195
196 pub async fn ingest_file(&self, path: &Path) -> Result<ScreenshotDocument> {
198 if !path.exists() {
199 return Err(Error::ingest(format!(
200 "screenshot file does not exist: {}",
201 path.display()
202 )));
203 }
204
205 let metadata = fs::metadata(path)?;
206 if !metadata.is_file() {
207 return Err(Error::ingest(format!(
208 "screenshot path is not a file: {}",
209 path.display()
210 )));
211 }
212
213 let provenance = self.build_provenance(path, &metadata);
214 let mut request =
215 ImageOcrRequest::new(path.to_path_buf(), OcrTargetKind::Screenshot, provenance);
216 request.min_confidence = self.config.min_confidence;
217 request.language_hints = self.config.language_hints.clone();
218
219 let ocr = self.backend.extract(&request).await?;
220 let screenshot_metadata = self.build_screenshot_metadata(path, &metadata);
221 let chunks = self.build_chunks(&ocr);
222 let title = infer_title(path);
223
224 Ok(ScreenshotDocument {
225 path: path.to_path_buf(),
226 title,
227 ocr,
228 metadata: screenshot_metadata,
229 chunks,
230 })
231 }
232
233 pub async fn ingest_as_entry(&self, path: &Path) -> Result<KnowledgeEntry> {
235 let doc = self.ingest_file(path).await?;
236 Ok(doc.to_knowledge_entry())
237 }
238
239 fn build_provenance(&self, path: &Path, fs_meta: &fs::Metadata) -> SourceProvenance {
240 let mut provenance =
241 SourceProvenance::new(SourceKind::Screenshot, path.to_string_lossy().into_owned())
242 .with_metadata("filename", file_name_string(path))
243 .with_metadata("extension", file_extension_string(path))
244 .with_metadata("file_size_bytes", fs_meta.len().to_string());
245
246 if let Some(captured_at) = system_time_to_utc(fs_meta.modified().ok()) {
247 provenance = provenance.with_captured_at(captured_at);
248 }
249
250 provenance
251 }
252
253 fn build_screenshot_metadata(
254 &self,
255 path: &Path,
256 fs_meta: &fs::Metadata,
257 ) -> HashMap<String, String> {
258 let mut out = HashMap::new();
259 out.insert("filename".to_string(), file_name_string(path));
260 out.insert("extension".to_string(), file_extension_string(path));
261 out.insert("file_size_bytes".to_string(), fs_meta.len().to_string());
262 out.insert(
263 "ui_chrome_included".to_string(),
264 self.config.include_ui_chrome.to_string(),
265 );
266 if let Some(min_conf) = self.config.min_confidence {
267 out.insert("min_confidence".to_string(), min_conf.to_string());
268 }
269 out
270 }
271
272 fn build_chunks(&self, ocr: &OcrDocument) -> Vec<ScreenshotTextChunk> {
273 if ocr.blocks.is_empty() {
274 let text = ocr.effective_text();
275 if text.trim().is_empty() {
276 return Vec::new();
277 }
278 return vec![ScreenshotTextChunk {
279 content: text,
280 block_kind: OcrBlockKind::Unknown,
281 confidence: None,
282 weight: self.config.content_weight,
283 }];
284 }
285
286 ocr.blocks
287 .iter()
288 .filter(|block| {
289 self.config.include_ui_chrome || !matches!(block.kind, OcrBlockKind::UiChrome)
290 })
291 .filter(|block| {
292 self.config
293 .min_confidence
294 .is_none_or(|min| block.confidence.unwrap_or(1.0) >= min)
295 })
296 .filter_map(|block| {
297 let content = block.text.trim();
298 if content.is_empty() {
299 return None;
300 }
301
302 let weight = if matches!(block.kind, OcrBlockKind::UiChrome) {
303 self.config.ui_chrome_weight
304 } else {
305 self.config.content_weight
306 };
307
308 Some(ScreenshotTextChunk {
309 content: content.to_string(),
310 block_kind: block.kind,
311 confidence: block.confidence,
312 weight,
313 })
314 })
315 .collect()
316 }
317}
318
319fn infer_title(path: &Path) -> String {
320 path.file_stem()
321 .and_then(|stem| stem.to_str())
322 .map(clean_screenshot_title)
323 .filter(|title| !title.is_empty())
324 .unwrap_or_else(|| "Screenshot".to_string())
325}
326
327fn clean_screenshot_title(raw: &str) -> String {
328 raw.replace('_', " ").trim().to_string()
329}
330
331fn file_name_string(path: &Path) -> String {
332 path.file_name()
333 .and_then(|name| name.to_str())
334 .unwrap_or_default()
335 .to_string()
336}
337
338fn file_extension_string(path: &Path) -> String {
339 path.extension()
340 .and_then(|ext| ext.to_str())
341 .unwrap_or_default()
342 .to_string()
343}
344
345fn system_time_to_utc(time: Option<std::time::SystemTime>) -> Option<DateTime<Utc>> {
346 time.map(DateTime::<Utc>::from)
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352 use crate::ingest::{FixtureOcrBackend, OcrDocument, OcrEngine, OcrTextBlock};
353 use tempfile::TempDir;
354
355 async fn create_temp_screenshot(temp_dir: &TempDir, name: &str) -> PathBuf {
356 let path = temp_dir.path().join(name);
357 tokio::fs::write(&path, b"fakepng").await.unwrap();
358 path
359 }
360
361 #[tokio::test]
362 async fn ingest_screenshot_builds_chunks_and_entry_metadata() {
363 let temp_dir = TempDir::new().unwrap();
364 let path = create_temp_screenshot(&temp_dir, "Screenshot_2026-02-22.png").await;
365
366 let fixture_provenance = SourceProvenance::new(SourceKind::Screenshot, "fixture://s1");
367 let mut fixture_doc = OcrDocument::new(
368 OcrEngine::Mock,
369 OcrTargetKind::Screenshot,
370 fixture_provenance,
371 );
372 fixture_doc.blocks = vec![
373 OcrTextBlock {
374 text: "Browser".into(),
375 confidence: Some(0.99),
376 bbox: None,
377 kind: OcrBlockKind::UiChrome,
378 },
379 OcrTextBlock {
380 text: "Important error message".into(),
381 confidence: Some(0.93),
382 bbox: None,
383 kind: OcrBlockKind::Paragraph,
384 },
385 OcrTextBlock {
386 text: "low conf".into(),
387 confidence: Some(0.2),
388 bbox: None,
389 kind: OcrBlockKind::Word,
390 },
391 ];
392
393 let backend = Arc::new(
394 FixtureOcrBackend::new().with_document("Screenshot_2026-02-22.png", fixture_doc),
395 );
396 let ingester = ScreenshotIngester::new(backend);
397
398 let doc = ingester.ingest_file(&path).await.unwrap();
399 assert_eq!(doc.title, "Screenshot 2026-02-22");
400 assert_eq!(doc.chunks.len(), 2); assert_eq!(doc.chunks[0].weight, ingester.config.ui_chrome_weight);
402 assert_eq!(doc.chunks[1].weight, ingester.config.content_weight);
403 assert!(doc.indexing_text().contains("Important error message"));
404
405 let entry = doc.to_knowledge_entry();
406 assert_eq!(entry.category.as_deref(), Some("Screenshot"));
407 assert_eq!(entry.metadata.get("source.kind"), Some("screenshot"));
408 assert_eq!(
409 entry.metadata.get("screenshot.ui_chrome_included"),
410 Some("true")
411 );
412 assert_eq!(entry.metadata.get("screenshot.chunk_count"), Some("2"));
413 }
414
415 #[tokio::test]
416 async fn ingest_screenshot_can_exclude_ui_chrome() {
417 let temp_dir = TempDir::new().unwrap();
418 let path = create_temp_screenshot(&temp_dir, "shot.png").await;
419
420 let fixture_provenance = SourceProvenance::new(SourceKind::Screenshot, "fixture://s2");
421 let mut fixture_doc = OcrDocument::new(
422 OcrEngine::Mock,
423 OcrTargetKind::Screenshot,
424 fixture_provenance,
425 );
426 fixture_doc.blocks = vec![
427 OcrTextBlock {
428 text: "Back".into(),
429 confidence: Some(0.95),
430 bbox: None,
431 kind: OcrBlockKind::UiChrome,
432 },
433 OcrTextBlock {
434 text: "Actual content".into(),
435 confidence: Some(0.95),
436 bbox: None,
437 kind: OcrBlockKind::Paragraph,
438 },
439 ];
440
441 let backend = Arc::new(FixtureOcrBackend::new().with_default_document(fixture_doc));
442 let config = ScreenshotIngesterConfig::default().with_ui_chrome(false);
443 let ingester = ScreenshotIngester::with_config(backend, config);
444
445 let doc = ingester.ingest_file(&path).await.unwrap();
446 assert_eq!(doc.chunks.len(), 1);
447 assert_eq!(doc.chunks[0].content, "Actual content");
448 assert!(!doc.indexing_text().contains("Back"));
449 }
450
451 #[tokio::test]
452 async fn ingest_screenshot_errors_for_missing_file() {
453 let backend = Arc::new(FixtureOcrBackend::new());
454 let ingester = ScreenshotIngester::new(backend);
455
456 let err = ingester
457 .ingest_file(Path::new("/tmp/does-not-exist-screenshot.png"))
458 .await
459 .unwrap_err();
460 assert!(err.to_string().contains("does not exist"));
461 }
462}