use chrono::{DateTime, Utc};
use rusqlite::{params, Connection};
use sha2::{Digest, Sha256};
use std::path::PathBuf;
use std::process::Command;
use crate::error::{EngramError, Result};
use crate::multimodal::vision::{VisionInput, VisionOptions, VisionProvider};
use crate::storage::queries::create_memory;
use crate::types::{CreateMemoryInput, MemoryTier, MemoryType};
#[derive(Debug, Clone)]
pub struct ScreenshotResult {
pub image_path: PathBuf,
pub width: u32,
pub height: u32,
pub file_size: u64,
pub timestamp: DateTime<Utc>,
pub file_hash: String,
}
pub struct ScreenshotCapture {
pub screenshot_dir: PathBuf,
}
impl ScreenshotCapture {
pub fn new() -> Result<Self> {
let screenshot_dir = default_screenshot_dir()?;
std::fs::create_dir_all(&screenshot_dir).map_err(|e| {
EngramError::Storage(format!(
"Failed to create screenshot directory {:?}: {}",
screenshot_dir, e
))
})?;
Ok(Self { screenshot_dir })
}
pub fn with_dir(screenshot_dir: PathBuf) -> Result<Self> {
std::fs::create_dir_all(&screenshot_dir).map_err(|e| {
EngramError::Storage(format!(
"Failed to create screenshot directory {:?}: {}",
screenshot_dir, e
))
})?;
Ok(Self { screenshot_dir })
}
pub fn capture(&self) -> Result<ScreenshotResult> {
let output_path = self.generate_path("screen");
run_screencapture(&["-x", output_path.to_str().unwrap_or("")], &output_path)?;
build_result(output_path)
}
pub fn capture_window(&self, app_name: &str) -> Result<ScreenshotResult> {
let output_path = self.generate_path(&sanitize_app_name(app_name));
match find_window_id(app_name) {
Some(window_id) => {
let window_id_str = window_id.to_string();
run_screencapture(
&[
"-x",
"-l",
&window_id_str,
output_path.to_str().unwrap_or(""),
],
&output_path,
)?;
}
None => {
tracing::warn!(
app = app_name,
"Window ID not found; falling back to full-screen capture"
);
run_screencapture(&["-x", output_path.to_str().unwrap_or("")], &output_path)?;
}
}
build_result(output_path)
}
fn generate_path(&self, prefix: &str) -> PathBuf {
let timestamp = Utc::now().format("%Y%m%d_%H%M%S_%3f");
self.screenshot_dir
.join(format!("{}_{}.png", prefix, timestamp))
}
}
pub async fn describe_and_store(
screenshot: &ScreenshotResult,
vision: &dyn VisionProvider,
conn: &Connection,
) -> Result<i64> {
let image_bytes = std::fs::read(&screenshot.image_path).map_err(|e| {
EngramError::Storage(format!(
"Failed to read screenshot {:?}: {}",
screenshot.image_path, e
))
})?;
let vision_input = VisionInput {
image_bytes,
mime_type: "image/png".to_string(),
};
let opts = VisionOptions {
prompt: Some(
"Describe this screenshot in detail. Note any UI elements, text, and visible content."
.to_string(),
),
max_tokens: None,
};
let description = vision.describe_image(vision_input, opts).await?;
let content = format!(
"[Screenshot] {}\n\nFile: {}\nCaptured: {}\nSize: {}×{} px ({} bytes)",
description.text,
screenshot.image_path.display(),
screenshot.timestamp.to_rfc3339(),
screenshot.width,
screenshot.height,
screenshot.file_size,
);
let memory_input = CreateMemoryInput {
content,
memory_type: MemoryType::Note,
tags: vec!["screenshot".to_string(), "multimodal".to_string()],
tier: MemoryTier::Permanent,
..Default::default()
};
let memory = create_memory(conn, &memory_input)?;
insert_media_asset(
conn,
memory.id,
screenshot,
&description.text,
&description.provider,
&description.model,
)?;
Ok(memory.id)
}
fn default_screenshot_dir() -> Result<PathBuf> {
let base = dirs::data_local_dir()
.ok_or_else(|| EngramError::Config("Cannot determine local data directory".to_string()))?;
Ok(base.join("engram").join("screenshots"))
}
fn run_screencapture(args: &[&str], expected_output: &PathBuf) -> Result<()> {
let status = Command::new("screencapture")
.args(args)
.status()
.map_err(|e| EngramError::Storage(format!("Failed to launch screencapture: {}", e)))?;
if !status.success() {
return Err(EngramError::Storage(format!(
"screencapture exited with status {:?}",
status.code()
)));
}
if !expected_output.exists() {
return Err(EngramError::Storage(format!(
"screencapture did not produce output file: {:?}",
expected_output
)));
}
Ok(())
}
fn build_result(image_path: PathBuf) -> Result<ScreenshotResult> {
let metadata = std::fs::metadata(&image_path).map_err(|e| {
EngramError::Storage(format!(
"Cannot read screenshot metadata {:?}: {}",
image_path, e
))
})?;
let file_size = metadata.len();
let file_data = std::fs::read(&image_path).map_err(|e| {
EngramError::Storage(format!(
"Cannot read screenshot file {:?}: {}",
image_path, e
))
})?;
let file_hash = compute_sha256(&file_data);
let (width, height) = parse_png_dimensions(&file_data);
Ok(ScreenshotResult {
image_path,
width,
height,
file_size,
timestamp: Utc::now(),
file_hash,
})
}
fn compute_sha256(data: &[u8]) -> String {
let mut hasher = Sha256::new();
hasher.update(data);
format!("{:x}", hasher.finalize())
}
fn parse_png_dimensions(data: &[u8]) -> (u32, u32) {
if data.len() < 24 {
return (0, 0);
}
let png_signature = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
if &data[0..8] != png_signature {
return (0, 0);
}
let width = u32::from_be_bytes([data[16], data[17], data[18], data[19]]);
let height = u32::from_be_bytes([data[20], data[21], data[22], data[23]]);
(width, height)
}
fn sanitize_app_name(app_name: &str) -> String {
let sanitized: String = app_name
.chars()
.map(|c| if c.is_ascii_alphanumeric() { c } else { '-' })
.collect();
sanitized
.split('-')
.filter(|s| !s.is_empty())
.collect::<Vec<_>>()
.join("-")
.to_lowercase()
}
fn find_window_id(app_name: &str) -> Option<u32> {
let output = Command::new("screencapture").args(["-L"]).output().ok()?;
let stdout = String::from_utf8_lossy(&output.stdout);
let app_name_lower = app_name.to_lowercase();
for line in stdout.lines() {
let parts: Vec<&str> = line.splitn(3, ' ').collect();
if parts.len() >= 2 {
let owner = parts[1].to_lowercase();
if owner.contains(&app_name_lower) || app_name_lower.contains(&owner) {
if let Ok(id) = parts[0].parse::<u32>() {
return Some(id);
}
}
}
}
None
}
fn insert_media_asset(
conn: &Connection,
memory_id: i64,
screenshot: &ScreenshotResult,
description: &str,
provider: &str,
model: &str,
) -> Result<i64> {
conn.execute(
"INSERT OR IGNORE INTO media_assets
(memory_id, media_type, file_hash, file_path, file_size,
mime_type, width, height, description, provider, model)
VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)",
params![
memory_id,
"image",
screenshot.file_hash,
screenshot.image_path.to_str().unwrap_or(""),
screenshot.file_size as i64,
"image/png",
screenshot.width,
screenshot.height,
description,
provider,
model,
],
)
.map_err(EngramError::Database)?;
Ok(conn.last_insert_rowid())
}
#[cfg(test)]
mod tests {
use super::*;
use tempfile::tempdir;
#[test]
fn test_sanitize_app_name_removes_non_alphanumeric() {
assert_eq!(sanitize_app_name("Google Chrome"), "google-chrome");
assert_eq!(sanitize_app_name("Safari"), "safari");
assert_eq!(sanitize_app_name("Xcode 15.0"), "xcode-15-0");
assert_eq!(sanitize_app_name("VS Code"), "vs-code");
assert_eq!(sanitize_app_name("---foo---"), "foo");
}
#[test]
fn test_compute_sha256_is_deterministic() {
let data = b"hello world";
let hash1 = compute_sha256(data);
let hash2 = compute_sha256(data);
assert_eq!(hash1, hash2);
assert_eq!(hash1.len(), 64); assert_eq!(hash1.len(), 64); }
#[test]
fn test_compute_sha256_length() {
let data = vec![0u8; 1024];
let hash = compute_sha256(&data);
assert_eq!(hash.len(), 64, "SHA-256 hex string must be 64 chars");
assert!(
hash.chars().all(|c| c.is_ascii_hexdigit()),
"Hash must be lowercase hex"
);
}
#[test]
fn test_parse_png_dimensions_valid_png() {
let mut data = vec![0u8; 24];
data[0..8].copy_from_slice(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]);
data[16..20].copy_from_slice(&1920u32.to_be_bytes());
data[20..24].copy_from_slice(&1080u32.to_be_bytes());
let (w, h) = parse_png_dimensions(&data);
assert_eq!(w, 1920);
assert_eq!(h, 1080);
}
#[test]
fn test_parse_png_dimensions_too_short() {
let data = vec![0u8; 10];
let (w, h) = parse_png_dimensions(&data);
assert_eq!(w, 0);
assert_eq!(h, 0);
}
#[test]
fn test_parse_png_dimensions_invalid_signature() {
let mut data = vec![0u8; 24];
data[0..4].copy_from_slice(&[0xFF, 0xD8, 0xFF, 0xE0]);
data[16..20].copy_from_slice(&1920u32.to_be_bytes());
data[20..24].copy_from_slice(&1080u32.to_be_bytes());
let (w, h) = parse_png_dimensions(&data);
assert_eq!(w, 0);
assert_eq!(h, 0);
}
#[test]
fn test_screenshot_capture_creates_directory() {
let dir = tempdir().unwrap();
let nested = dir.path().join("a").join("b").join("screenshots");
assert!(!nested.exists());
let _capture = ScreenshotCapture::with_dir(nested.clone()).unwrap();
assert!(
nested.exists(),
"Directory should be created by ScreenshotCapture::with_dir"
);
}
#[test]
fn test_default_screenshot_dir_is_under_engram() {
let dir = default_screenshot_dir().unwrap();
let path_str = dir.to_string_lossy();
assert!(
path_str.contains("engram"),
"Default screenshot dir should be under an 'engram' directory, got: {}",
path_str
);
assert!(
path_str.ends_with("screenshots"),
"Default screenshot dir should end with 'screenshots', got: {}",
path_str
);
}
#[test]
fn test_parse_png_dimensions_1x1_pixel() {
let mut data = vec![0u8; 24];
data[0..8].copy_from_slice(&[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A]);
data[16..20].copy_from_slice(&1u32.to_be_bytes());
data[20..24].copy_from_slice(&1u32.to_be_bytes());
let (w, h) = parse_png_dimensions(&data);
assert_eq!(w, 1);
assert_eq!(h, 1);
}
#[test]
fn test_generate_path_includes_prefix_and_extension() {
let dir = tempdir().unwrap();
let capture = ScreenshotCapture::with_dir(dir.path().to_path_buf()).unwrap();
let path = capture.generate_path("screen");
let filename = path.file_name().unwrap().to_str().unwrap();
assert!(
filename.starts_with("screen_"),
"filename should start with 'screen_', got: {}",
filename
);
assert!(
filename.ends_with(".png"),
"filename should end with '.png', got: {}",
filename
);
}
#[test]
fn test_generate_path_unique_for_different_prefixes() {
let dir = tempdir().unwrap();
let capture = ScreenshotCapture::with_dir(dir.path().to_path_buf()).unwrap();
let path1 = capture.generate_path("screen");
let path2 = capture.generate_path("safari");
assert_ne!(path1, path2);
}
}