Skip to main content

engram/storage/
image_storage.rs

1//! Image storage backend for memory images.
2//!
3//! This module handles uploading, retrieving, and deleting images from
4//! S3-compatible storage (like Cloudflare R2). Images are stored separately
5//! from the SQLite database.
6
7use base64::{engine::general_purpose::STANDARD as BASE64, Engine};
8use chrono::Utc;
9use rusqlite::{params, Connection};
10use serde::{Deserialize, Serialize};
11use sha2::{Digest, Sha256};
12use std::path::PathBuf;
13
14use crate::error::{EngramError, Result};
15
16/// Configuration for image storage
17#[derive(Debug, Clone)]
18pub struct ImageStorageConfig {
19    /// Local storage directory for images (fallback when S3 not configured)
20    pub local_dir: PathBuf,
21    /// S3 bucket name (optional)
22    pub s3_bucket: Option<String>,
23    /// S3 endpoint URL (optional, for R2/MinIO)
24    pub s3_endpoint: Option<String>,
25    /// Public domain for serving images (optional)
26    pub public_domain: Option<String>,
27}
28
29impl Default for ImageStorageConfig {
30    fn default() -> Self {
31        let local_dir = dirs::data_local_dir()
32            .unwrap_or_else(|| PathBuf::from("."))
33            .join("engram")
34            .join("images");
35        Self {
36            local_dir,
37            s3_bucket: None,
38            s3_endpoint: None,
39            public_domain: None,
40        }
41    }
42}
43
44/// Uploaded image information
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct UploadedImage {
47    /// Storage key/path
48    pub key: String,
49    /// Full URL to access the image
50    pub url: String,
51    /// Original filename if available
52    pub filename: Option<String>,
53    /// Content type (MIME type)
54    pub content_type: String,
55    /// Size in bytes
56    pub size: usize,
57    /// Content hash (SHA256)
58    pub hash: String,
59}
60
61/// Image reference stored in memory metadata
62#[derive(Debug, Clone, Serialize, Deserialize)]
63pub struct ImageRef {
64    /// Storage URL (local:// or r2:// or https://)
65    pub url: String,
66    /// Optional caption
67    pub caption: Option<String>,
68    /// Image index within the memory
69    pub index: i32,
70    /// Content type
71    pub content_type: String,
72    /// Size in bytes
73    pub size: usize,
74}
75
76/// Result of image migration
77#[derive(Debug, Clone, Serialize, Deserialize)]
78pub struct MigrationResult {
79    pub memories_scanned: i64,
80    pub memories_with_images: i64,
81    pub images_migrated: i64,
82    pub images_failed: i64,
83    pub errors: Vec<String>,
84    pub dry_run: bool,
85}
86
87/// Compute SHA256 hash of data
88fn compute_hash(data: &[u8]) -> String {
89    let mut hasher = Sha256::new();
90    hasher.update(data);
91    format!("{:x}", hasher.finalize())
92}
93
94/// Get file extension from content type
95fn extension_from_content_type(content_type: &str) -> &str {
96    match content_type {
97        "image/jpeg" => "jpg",
98        "image/png" => "png",
99        "image/gif" => "gif",
100        "image/webp" => "webp",
101        "image/svg+xml" => "svg",
102        "image/bmp" => "bmp",
103        "image/tiff" => "tiff",
104        _ => "bin",
105    }
106}
107
108/// Detect content type from file extension
109fn content_type_from_extension(ext: &str) -> &str {
110    match ext.to_lowercase().as_str() {
111        "jpg" | "jpeg" => "image/jpeg",
112        "png" => "image/png",
113        "gif" => "image/gif",
114        "webp" => "image/webp",
115        "svg" => "image/svg+xml",
116        "bmp" => "image/bmp",
117        "tiff" | "tif" => "image/tiff",
118        _ => "application/octet-stream",
119    }
120}
121
122/// Parse a data URI into bytes and content type
123pub fn parse_data_uri(data_uri: &str) -> Result<(Vec<u8>, String)> {
124    // Match data URI format: data:mime/type;base64,DATA
125    if !data_uri.starts_with("data:") {
126        return Err(EngramError::InvalidInput("Not a data URI".to_string()));
127    }
128
129    let rest = &data_uri[5..];
130    let (content_type, data) = if let Some(semicolon_pos) = rest.find(';') {
131        let ct = &rest[..semicolon_pos];
132        let after_semicolon = &rest[semicolon_pos + 1..];
133
134        if let Some(stripped) = after_semicolon.strip_prefix("base64,") {
135            (ct.to_string(), stripped)
136        } else {
137            return Err(EngramError::InvalidInput(
138                "Invalid data URI encoding".to_string(),
139            ));
140        }
141    } else {
142        return Err(EngramError::InvalidInput(
143            "Invalid data URI format".to_string(),
144        ));
145    };
146
147    let bytes = BASE64
148        .decode(data)
149        .map_err(|e| EngramError::InvalidInput(format!("Failed to decode base64: {}", e)))?;
150
151    Ok((bytes, content_type))
152}
153
154/// Local file-based image storage (used when S3 is not configured)
155pub struct LocalImageStorage {
156    base_dir: PathBuf,
157}
158
159impl LocalImageStorage {
160    pub fn new(base_dir: PathBuf) -> Result<Self> {
161        std::fs::create_dir_all(&base_dir)
162            .map_err(|e| EngramError::Storage(format!("Failed to create image dir: {}", e)))?;
163        Ok(Self { base_dir })
164    }
165
166    /// Generate storage key for an image
167    fn generate_key(
168        &self,
169        memory_id: i64,
170        image_index: i32,
171        hash: &str,
172        extension: &str,
173    ) -> String {
174        let timestamp = Utc::now().timestamp();
175        let short_hash = &hash[..8.min(hash.len())];
176        format!(
177            "images/{}/{}_{}_{}.{}",
178            memory_id, timestamp, image_index, short_hash, extension
179        )
180    }
181
182    /// Upload image from bytes
183    pub fn upload_image(
184        &self,
185        image_data: &[u8],
186        content_type: &str,
187        memory_id: i64,
188        image_index: i32,
189    ) -> Result<UploadedImage> {
190        let hash = compute_hash(image_data);
191        let extension = extension_from_content_type(content_type);
192        let key = self.generate_key(memory_id, image_index, &hash, extension);
193
194        // Create directory structure
195        let full_path = self.base_dir.join(&key);
196        if let Some(parent) = full_path.parent() {
197            std::fs::create_dir_all(parent)
198                .map_err(|e| EngramError::Storage(format!("Failed to create dir: {}", e)))?;
199        }
200
201        // Write file
202        std::fs::write(&full_path, image_data)
203            .map_err(|e| EngramError::Storage(format!("Failed to write image: {}", e)))?;
204
205        let url = format!("local://{}", key);
206
207        Ok(UploadedImage {
208            key,
209            url,
210            filename: None,
211            content_type: content_type.to_string(),
212            size: image_data.len(),
213            hash,
214        })
215    }
216
217    /// Upload image from file path
218    pub fn upload_from_file(
219        &self,
220        file_path: &str,
221        memory_id: i64,
222        image_index: i32,
223    ) -> Result<UploadedImage> {
224        let path = std::path::Path::new(file_path);
225
226        // Read file
227        let image_data = std::fs::read(path)
228            .map_err(|e| EngramError::Storage(format!("Failed to read file: {}", e)))?;
229
230        // Detect content type from extension
231        let extension = path.extension().and_then(|e| e.to_str()).unwrap_or("bin");
232        let content_type = content_type_from_extension(extension);
233
234        let mut result = self.upload_image(&image_data, content_type, memory_id, image_index)?;
235        result.filename = path.file_name().and_then(|n| n.to_str()).map(String::from);
236
237        Ok(result)
238    }
239
240    /// Get full path for a key
241    pub fn get_path(&self, key: &str) -> PathBuf {
242        self.base_dir.join(key)
243    }
244
245    /// Delete an image
246    pub fn delete_image(&self, key: &str) -> Result<bool> {
247        let path = self.get_path(key);
248        if path.exists() {
249            std::fs::remove_file(&path)
250                .map_err(|e| EngramError::Storage(format!("Failed to delete image: {}", e)))?;
251            Ok(true)
252        } else {
253            Ok(false)
254        }
255    }
256
257    /// Delete all images for a memory
258    pub fn delete_memory_images(&self, memory_id: i64) -> Result<i64> {
259        let dir = self.base_dir.join("images").join(memory_id.to_string());
260        if !dir.exists() {
261            return Ok(0);
262        }
263
264        let mut count = 0;
265        for entry in std::fs::read_dir(&dir)
266            .map_err(|e| EngramError::Storage(format!("Failed to read dir: {}", e)))?
267        {
268            let entry =
269                entry.map_err(|e| EngramError::Storage(format!("Failed to read entry: {}", e)))?;
270            if entry.path().is_file() {
271                std::fs::remove_file(entry.path())
272                    .map_err(|e| EngramError::Storage(format!("Failed to delete file: {}", e)))?;
273                count += 1;
274            }
275        }
276
277        // Remove empty directory
278        let _ = std::fs::remove_dir(&dir);
279
280        Ok(count)
281    }
282}
283
284/// Upload an image to storage and link it to a memory
285pub fn upload_image(
286    conn: &Connection,
287    storage: &LocalImageStorage,
288    memory_id: i64,
289    file_path: &str,
290    image_index: i32,
291    caption: Option<&str>,
292) -> Result<ImageRef> {
293    use crate::storage::queries::get_memory;
294
295    // Verify memory exists
296    let memory = get_memory(conn, memory_id)?;
297
298    // Upload the image
299    let uploaded = storage.upload_from_file(file_path, memory_id, image_index)?;
300
301    // Create image reference
302    let image_ref = ImageRef {
303        url: uploaded.url.clone(),
304        caption: caption.map(String::from),
305        index: image_index,
306        content_type: uploaded.content_type,
307        size: uploaded.size,
308    };
309
310    // Update memory metadata with image reference
311    let mut metadata = memory.metadata.clone();
312    let images: Vec<ImageRef> = metadata
313        .get("images")
314        .and_then(|v| serde_json::from_value(v.clone()).ok())
315        .unwrap_or_default();
316
317    let mut images: Vec<ImageRef> = images
318        .into_iter()
319        .filter(|i| i.index != image_index)
320        .collect();
321    images.push(image_ref.clone());
322    images.sort_by_key(|i| i.index);
323
324    metadata.insert("images".to_string(), serde_json::to_value(&images)?);
325    let metadata_json = serde_json::to_string(&metadata)?;
326
327    conn.execute(
328        "UPDATE memories SET metadata = ?, updated_at = ? WHERE id = ?",
329        params![metadata_json, Utc::now().to_rfc3339(), memory_id],
330    )?;
331
332    Ok(image_ref)
333}
334
335/// Migrate base64-encoded images to storage
336pub fn migrate_images(
337    conn: &Connection,
338    storage: &LocalImageStorage,
339    dry_run: bool,
340) -> Result<MigrationResult> {
341    use crate::storage::queries::get_memory;
342
343    let mut result = MigrationResult {
344        memories_scanned: 0,
345        memories_with_images: 0,
346        images_migrated: 0,
347        images_failed: 0,
348        errors: Vec::new(),
349        dry_run,
350    };
351
352    // Find all memories
353    let mut stmt = conn.prepare("SELECT id, metadata FROM memories WHERE valid_to IS NULL")?;
354
355    let memory_ids: Vec<i64> = stmt
356        .query_map([], |row| row.get(0))?
357        .filter_map(|r| r.ok())
358        .collect();
359
360    for memory_id in memory_ids {
361        result.memories_scanned += 1;
362
363        let memory = match get_memory(conn, memory_id) {
364            Ok(m) => m,
365            Err(e) => {
366                result
367                    .errors
368                    .push(format!("Failed to get memory {}: {}", memory_id, e));
369                continue;
370            }
371        };
372
373        // Check for images in metadata
374        let images: Vec<serde_json::Value> = memory
375            .metadata
376            .get("images")
377            .and_then(|v| v.as_array())
378            .cloned()
379            .unwrap_or_default();
380
381        // Also check content for inline data URIs
382        let content_has_data_uri = memory.content.contains("data:image/");
383
384        if images.is_empty() && !content_has_data_uri {
385            continue;
386        }
387
388        result.memories_with_images += 1;
389
390        // Process images in metadata
391        let mut new_images: Vec<ImageRef> = Vec::new();
392        let mut image_index = 0;
393
394        for img in images {
395            let url = img.get("url").and_then(|v| v.as_str()).unwrap_or("");
396
397            // Skip if already migrated (not a data URI)
398            if !url.starts_with("data:") {
399                if let Ok(existing) = serde_json::from_value::<ImageRef>(img.clone()) {
400                    new_images.push(existing);
401                }
402                continue;
403            }
404
405            // Parse and upload data URI
406            match parse_data_uri(url) {
407                Ok((data, content_type)) => {
408                    if dry_run {
409                        result.images_migrated += 1;
410                        // In dry run, keep existing
411                        if let Ok(existing) = serde_json::from_value::<ImageRef>(img.clone()) {
412                            new_images.push(existing);
413                        }
414                    } else {
415                        match storage.upload_image(&data, &content_type, memory_id, image_index) {
416                            Ok(uploaded) => {
417                                let caption = img
418                                    .get("caption")
419                                    .and_then(|v| v.as_str())
420                                    .map(String::from);
421                                new_images.push(ImageRef {
422                                    url: uploaded.url,
423                                    caption,
424                                    index: image_index,
425                                    content_type: uploaded.content_type,
426                                    size: uploaded.size,
427                                });
428                                result.images_migrated += 1;
429                            }
430                            Err(e) => {
431                                result.images_failed += 1;
432                                result.errors.push(format!(
433                                    "Failed to upload image {} for memory {}: {}",
434                                    image_index, memory_id, e
435                                ));
436                                // Keep original on failure
437                                if let Ok(existing) =
438                                    serde_json::from_value::<ImageRef>(img.clone())
439                                {
440                                    new_images.push(existing);
441                                }
442                            }
443                        }
444                    }
445                }
446                Err(e) => {
447                    result.images_failed += 1;
448                    result.errors.push(format!(
449                        "Failed to parse data URI for memory {}: {}",
450                        memory_id, e
451                    ));
452                    // Keep original on failure
453                    if let Ok(existing) = serde_json::from_value::<ImageRef>(img.clone()) {
454                        new_images.push(existing);
455                    }
456                }
457            }
458            image_index += 1;
459        }
460
461        // Update metadata with migrated images (unless dry run)
462        if !dry_run && !new_images.is_empty() {
463            let mut metadata = memory.metadata.clone();
464            metadata.insert("images".to_string(), serde_json::to_value(&new_images)?);
465            let metadata_json = serde_json::to_string(&metadata)?;
466
467            if let Err(e) = conn.execute(
468                "UPDATE memories SET metadata = ?, updated_at = ? WHERE id = ?",
469                params![metadata_json, Utc::now().to_rfc3339(), memory_id],
470            ) {
471                result
472                    .errors
473                    .push(format!("Failed to update memory {}: {}", memory_id, e));
474            }
475        }
476    }
477
478    Ok(result)
479}
480
481#[cfg(test)]
482mod tests {
483    use super::*;
484    use tempfile::tempdir;
485
486    #[test]
487    fn test_parse_data_uri() {
488        let data_uri = "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==";
489        let (bytes, content_type) = parse_data_uri(data_uri).unwrap();
490        assert_eq!(content_type, "image/png");
491        assert!(!bytes.is_empty());
492    }
493
494    #[test]
495    fn test_local_storage() {
496        let dir = tempdir().unwrap();
497        let storage = LocalImageStorage::new(dir.path().to_path_buf()).unwrap();
498
499        // Create a simple 1x1 PNG
500        let png_data = [0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
501
502        let result = storage.upload_image(&png_data, "image/png", 1, 0).unwrap();
503        assert!(result.url.starts_with("local://"));
504        assert_eq!(result.content_type, "image/png");
505        assert_eq!(result.size, png_data.len());
506
507        // Verify file exists
508        let path = storage.get_path(&result.key);
509        assert!(path.exists());
510
511        // Delete
512        assert!(storage.delete_image(&result.key).unwrap());
513        assert!(!path.exists());
514    }
515
516    #[test]
517    fn test_content_type_detection() {
518        assert_eq!(content_type_from_extension("jpg"), "image/jpeg");
519        assert_eq!(content_type_from_extension("PNG"), "image/png");
520        assert_eq!(content_type_from_extension("webp"), "image/webp");
521    }
522}