Skip to main content

coding_agent_search/pages/
attachments.rs

1//! Attachment support for pages export.
2//!
3//! Implements opt-in attachment handling for images, PDFs, and code snapshots
4//! with proper encryption, size limits, and lazy loading.
5//!
6//! # Overview
7//!
8//! Attachments are stored in a `blobs/` directory with:
9//! - Each blob named by its SHA-256 hash
10//! - Blobs individually encrypted with unique nonces
11//! - A manifest file mapping hashes to metadata
12//!
13//! # Size Limits
14//!
15//! - Per-file maximum: 10 MB (default)
16//! - Total maximum: 100 MB (default, configurable)
17
18use aes_gcm::{
19    Aes256Gcm, Nonce,
20    aead::{Aead, KeyInit, Payload},
21};
22use anyhow::{Context, Result, bail};
23use serde::{Deserialize, Serialize};
24use sha2::{Digest, Sha256};
25use std::collections::HashMap;
26use std::fs::{self, File, OpenOptions};
27use std::io::{BufWriter, Write};
28use std::path::{Path, PathBuf};
29use tracing::{debug, info, warn};
30
31/// Default maximum size per attachment (10 MB)
32pub const DEFAULT_MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
33
34/// Default maximum total size for all attachments (100 MB)
35pub const DEFAULT_MAX_TOTAL_SIZE: usize = 100 * 1024 * 1024;
36
37/// Default allowed MIME types
38pub const DEFAULT_ALLOWED_MIME_TYPES: &[&str] = &[
39    // Images
40    "image/png",
41    "image/jpeg",
42    "image/gif",
43    "image/webp",
44    "image/svg+xml",
45    // Documents
46    "application/pdf",
47    // Text
48    "text/plain",
49    "text/html",
50    "text/css",
51    "text/javascript",
52    "application/json",
53    "application/xml",
54];
55
56/// Nonce derivation domain separator for blob encryption
57const BLOB_NONCE_DOMAIN: &[u8] = b"cass-blob-nonce-v1";
58
59/// Configuration for attachment processing
60#[derive(Debug, Clone)]
61pub struct AttachmentConfig {
62    /// Whether attachment processing is enabled
63    pub enabled: bool,
64    /// Maximum size per file in bytes
65    pub max_file_size_bytes: usize,
66    /// Maximum total size for all attachments in bytes
67    pub max_total_size_bytes: usize,
68    /// Allowed MIME types
69    pub allowed_mime_types: Vec<String>,
70}
71
72impl Default for AttachmentConfig {
73    fn default() -> Self {
74        Self {
75            enabled: false, // Disabled by default
76            max_file_size_bytes: DEFAULT_MAX_FILE_SIZE,
77            max_total_size_bytes: DEFAULT_MAX_TOTAL_SIZE,
78            allowed_mime_types: DEFAULT_ALLOWED_MIME_TYPES
79                .iter()
80                .map(|s| s.to_string())
81                .collect(),
82        }
83    }
84}
85
86impl AttachmentConfig {
87    /// Create a new config with attachments enabled
88    pub fn enabled() -> Self {
89        Self {
90            enabled: true,
91            ..Default::default()
92        }
93    }
94
95    /// Set the maximum file size
96    pub fn with_max_file_size(mut self, bytes: usize) -> Self {
97        self.max_file_size_bytes = bytes;
98        self
99    }
100
101    /// Set the maximum total size
102    pub fn with_max_total_size(mut self, bytes: usize) -> Self {
103        self.max_total_size_bytes = bytes;
104        self
105    }
106
107    /// Check if a MIME type is allowed
108    pub fn is_mime_allowed(&self, mime_type: &str) -> bool {
109        let Some(mime_type) = mime_type_essence(mime_type) else {
110            return false;
111        };
112        self.allowed_mime_types
113            .iter()
114            .filter_map(|allowed| mime_type_essence(allowed))
115            .any(|allowed| mime_type == allowed)
116    }
117}
118
119fn mime_type_essence(mime_type: &str) -> Option<String> {
120    let essence = mime_type.split(';').next()?.trim();
121    if essence.is_empty() {
122        return None;
123    }
124    Some(essence.to_ascii_lowercase())
125}
126
127/// Raw attachment data from a connector
128#[derive(Debug, Clone)]
129pub struct AttachmentData {
130    /// Original filename
131    pub filename: String,
132    /// MIME type
133    pub mime_type: String,
134    /// Raw data bytes
135    pub data: Vec<u8>,
136}
137
138/// Metadata for a processed attachment entry
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct AttachmentEntry {
141    /// SHA-256 hash of plaintext (used as blob filename)
142    pub hash: String,
143    /// Original filename
144    pub filename: String,
145    /// MIME type
146    pub mime_type: String,
147    /// Size in bytes
148    pub size_bytes: usize,
149    /// Associated message ID
150    pub message_id: i64,
151}
152
153/// Manifest containing all attachment metadata
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct AttachmentManifest {
156    /// Version of the manifest format
157    pub version: u8,
158    /// List of all attachments
159    pub entries: Vec<AttachmentEntry>,
160    /// Total size of all attachments
161    pub total_size_bytes: usize,
162}
163
164impl Default for AttachmentManifest {
165    fn default() -> Self {
166        Self {
167            version: 1,
168            entries: Vec::new(),
169            total_size_bytes: 0,
170        }
171    }
172}
173
174/// Attachment processor that collects and encrypts attachments
175pub struct AttachmentProcessor {
176    config: AttachmentConfig,
177    entries: Vec<AttachmentEntry>,
178    /// Map of hash -> data for deduplication
179    blobs: HashMap<String, Vec<u8>>,
180    total_size: usize,
181    /// Count of skipped attachments
182    skipped_count: usize,
183}
184
185impl AttachmentProcessor {
186    /// Create a new attachment processor with the given configuration
187    pub fn new(config: AttachmentConfig) -> Self {
188        Self {
189            config,
190            entries: Vec::new(),
191            blobs: HashMap::new(),
192            total_size: 0,
193            skipped_count: 0,
194        }
195    }
196
197    /// Check if attachment processing is enabled
198    pub fn is_enabled(&self) -> bool {
199        self.config.enabled
200    }
201
202    /// Get the current total size
203    pub fn total_size(&self) -> usize {
204        self.total_size
205    }
206
207    /// Get the number of processed attachments
208    pub fn count(&self) -> usize {
209        self.entries.len()
210    }
211
212    /// Get the number of skipped attachments
213    pub fn skipped_count(&self) -> usize {
214        self.skipped_count
215    }
216
217    /// Process attachments from a message
218    ///
219    /// Returns a list of blob hashes that were successfully processed.
220    /// Attachments that exceed size limits or have disallowed MIME types
221    /// are logged and skipped.
222    pub fn process_attachments(
223        &mut self,
224        message_id: i64,
225        attachments: &[AttachmentData],
226    ) -> Result<Vec<String>> {
227        if !self.config.enabled {
228            return Ok(Vec::new());
229        }
230
231        let mut refs = Vec::new();
232
233        for attachment in attachments {
234            // Check MIME type
235            if !self.config.is_mime_allowed(&attachment.mime_type) {
236                warn!(
237                    filename = %attachment.filename,
238                    mime_type = %attachment.mime_type,
239                    "Skipping attachment with disallowed MIME type"
240                );
241                self.skipped_count += 1;
242                continue;
243            }
244
245            // Check per-file size limit
246            if attachment.data.len() > self.config.max_file_size_bytes {
247                warn!(
248                    filename = %attachment.filename,
249                    size = attachment.data.len(),
250                    limit = self.config.max_file_size_bytes,
251                    "Skipping oversized attachment"
252                );
253                self.skipped_count += 1;
254                continue;
255            }
256
257            // Check total size limit
258            if self.total_size + attachment.data.len() > self.config.max_total_size_bytes {
259                warn!(
260                    filename = %attachment.filename,
261                    current_total = self.total_size,
262                    attachment_size = attachment.data.len(),
263                    limit = self.config.max_total_size_bytes,
264                    "Total attachment limit reached, skipping"
265                );
266                self.skipped_count += 1;
267                continue;
268            }
269
270            // Compute SHA-256 hash
271            let hash = compute_sha256_hex(&attachment.data);
272
273            // Check for deduplication
274            if self.blobs.contains_key(&hash) {
275                debug!(
276                    filename = %attachment.filename,
277                    hash = %hash,
278                    "Attachment already processed (deduplicated)"
279                );
280                // Still add the entry for this message
281                self.entries.push(AttachmentEntry {
282                    hash: hash.clone(),
283                    filename: attachment.filename.clone(),
284                    mime_type: attachment.mime_type.clone(),
285                    size_bytes: attachment.data.len(),
286                    message_id,
287                });
288                refs.push(hash);
289                continue;
290            }
291
292            // Store the blob
293            self.blobs.insert(hash.clone(), attachment.data.clone());
294            self.total_size += attachment.data.len();
295
296            // Create entry
297            self.entries.push(AttachmentEntry {
298                hash: hash.clone(),
299                filename: attachment.filename.clone(),
300                mime_type: attachment.mime_type.clone(),
301                size_bytes: attachment.data.len(),
302                message_id,
303            });
304
305            debug!(
306                filename = %attachment.filename,
307                hash = %hash,
308                size = attachment.data.len(),
309                "Processed attachment"
310            );
311
312            refs.push(hash);
313        }
314
315        Ok(refs)
316    }
317
318    /// Write encrypted blobs to the output directory
319    ///
320    /// Each blob is encrypted with AES-256-GCM using:
321    /// - DEK: Same data encryption key as main database
322    /// - Nonce: Derived from blob hash using HKDF
323    /// - AAD: export_id || hash bytes
324    pub fn write_encrypted_blobs(
325        &self,
326        output_dir: &Path,
327        dek: &[u8; 32],
328        export_id: &[u8; 16],
329    ) -> Result<AttachmentManifest> {
330        if self.blobs.is_empty() {
331            return Ok(AttachmentManifest::default());
332        }
333
334        let blobs_dir = output_dir.join("blobs");
335        ensure_real_output_directory(output_dir, "Attachment output directory")?;
336        ensure_real_output_directory(&blobs_dir, "Attachment blobs directory")?;
337
338        let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
339
340        for (hash, data) in &self.blobs {
341            let blob_path = blobs_dir.join(format!("{}.bin", hash));
342
343            // Derive nonce from hash
344            let nonce = derive_blob_nonce(hash);
345
346            // Build AAD: export_id || hash_bytes
347            let hash_bytes = hex::decode(hash).context("Invalid hash hex")?;
348            let mut aad = Vec::with_capacity(export_id.len() + hash_bytes.len());
349            aad.extend_from_slice(export_id);
350            aad.extend_from_slice(&hash_bytes);
351
352            // Encrypt
353            let ciphertext = cipher
354                .encrypt(
355                    Nonce::from_slice(&nonce),
356                    Payload {
357                        msg: data.as_slice(),
358                        aad: &aad,
359                    },
360                )
361                .map_err(|e| anyhow::anyhow!("Blob encryption failed: {}", e))?;
362
363            write_ciphertext_file(&blob_path, &ciphertext, "attachment blob")?;
364
365            debug!(hash = %hash, path = %blob_path.display(), "Wrote encrypted blob");
366        }
367
368        // Write encrypted manifest
369        let manifest = AttachmentManifest {
370            version: 1,
371            entries: self.entries.clone(),
372            total_size_bytes: self.total_size,
373        };
374
375        let manifest_json =
376            serde_json::to_vec(&manifest).context("Failed to serialize manifest")?;
377
378        // Use a fixed nonce for the manifest (derived from "manifest" string)
379        let manifest_nonce = derive_blob_nonce("manifest");
380
381        // AAD for manifest: just export_id
382        let manifest_ciphertext = cipher
383            .encrypt(
384                Nonce::from_slice(&manifest_nonce),
385                Payload {
386                    msg: &manifest_json,
387                    aad: export_id,
388                },
389            )
390            .map_err(|e| anyhow::anyhow!("Manifest encryption failed: {}", e))?;
391
392        let manifest_path = blobs_dir.join("manifest.enc");
393        write_ciphertext_file(&manifest_path, &manifest_ciphertext, "attachment manifest")?;
394
395        info!(
396            count = self.entries.len(),
397            unique_blobs = self.blobs.len(),
398            total_size = self.total_size,
399            skipped = self.skipped_count,
400            "Wrote encrypted attachments"
401        );
402
403        Ok(manifest)
404    }
405}
406
407/// Compute SHA-256 hash of data and return as lowercase hex string
408fn compute_sha256_hex(data: &[u8]) -> String {
409    let mut hasher = Sha256::new();
410    hasher.update(data);
411    let result = hasher.finalize();
412    hex::encode(result)
413}
414
415/// Derive a unique 12-byte nonce from a blob identifier using HKDF
416fn derive_blob_nonce(identifier: &str) -> [u8; 12] {
417    crate::encryption::hkdf_extract_expand(identifier.as_bytes(), BLOB_NONCE_DOMAIN, b"nonce", 12)
418        .expect("HKDF expansion should never fail for 12 bytes")
419        .try_into()
420        .expect("HKDF expansion should return the requested nonce length")
421}
422
423/// Decrypt a blob given the DEK, export_id, and hash
424pub fn decrypt_blob(
425    ciphertext: &[u8],
426    dek: &[u8; 32],
427    export_id: &[u8; 16],
428    hash: &str,
429) -> Result<Vec<u8>> {
430    let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
431
432    // Derive nonce from hash
433    let nonce = derive_blob_nonce(hash);
434
435    // Build AAD
436    let hash_bytes = hex::decode(hash).context("Invalid hash hex")?;
437    let mut aad = Vec::with_capacity(export_id.len() + hash_bytes.len());
438    aad.extend_from_slice(export_id);
439    aad.extend_from_slice(&hash_bytes);
440
441    // Decrypt
442    let plaintext = cipher
443        .decrypt(
444            Nonce::from_slice(&nonce),
445            Payload {
446                msg: ciphertext,
447                aad: &aad,
448            },
449        )
450        .map_err(|_| anyhow::anyhow!("Blob decryption failed"))?;
451
452    Ok(plaintext)
453}
454
455/// Decrypt the attachment manifest
456pub fn decrypt_manifest(
457    ciphertext: &[u8],
458    dek: &[u8; 32],
459    export_id: &[u8; 16],
460) -> Result<AttachmentManifest> {
461    let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
462
463    // Use fixed nonce for manifest
464    let nonce = derive_blob_nonce("manifest");
465
466    // Decrypt
467    let plaintext = cipher
468        .decrypt(
469            Nonce::from_slice(&nonce),
470            Payload {
471                msg: ciphertext,
472                aad: export_id,
473            },
474        )
475        .map_err(|_| anyhow::anyhow!("Manifest decryption failed"))?;
476
477    let manifest: AttachmentManifest =
478        serde_json::from_slice(&plaintext).context("Failed to deserialize manifest")?;
479
480    Ok(manifest)
481}
482
483pub(crate) fn reencrypt_blobs_into_dir(
484    source_archive_dir: &Path,
485    output_archive_dir: &Path,
486    old_dek: &[u8; 32],
487    old_export_id: &[u8; 16],
488    new_dek: &[u8; 32],
489    new_export_id: &[u8; 16],
490) -> Result<()> {
491    let source_blobs_dir = source_archive_dir.join("blobs");
492    ensure_existing_ancestors_have_no_symlinks(
493        &source_blobs_dir,
494        "Source attachment blobs directory",
495    )?;
496    match fs::symlink_metadata(&source_blobs_dir) {
497        Ok(meta) => {
498            let file_type = meta.file_type();
499            if file_type.is_symlink() {
500                bail!(
501                    "Refusing to re-encrypt attachments from symlinked blobs directory: {}",
502                    source_blobs_dir.display()
503                );
504            }
505            if !file_type.is_dir() {
506                bail!(
507                    "Refusing to re-encrypt attachments from non-directory blobs path: {}",
508                    source_blobs_dir.display()
509                );
510            }
511        }
512        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
513        Err(err) => {
514            return Err(err).with_context(|| {
515                format!(
516                    "Failed to inspect attachment blobs directory {}",
517                    source_blobs_dir.display()
518                )
519            });
520        }
521    }
522
523    let output_blobs_dir = output_archive_dir.join("blobs");
524    ensure_real_output_directory(&output_blobs_dir, "Destination attachment blobs directory")?;
525
526    let manifest_path = source_blobs_dir.join("manifest.enc");
527    ensure_regular_ciphertext_file(&manifest_path, "attachment manifest")?;
528    let manifest_ciphertext =
529        fs::read(&manifest_path).context("Failed to read attachment manifest for rekey")?;
530    let manifest = decrypt_manifest(&manifest_ciphertext, old_dek, old_export_id)
531        .context("Failed to decrypt attachment manifest during key rotation")?;
532
533    let mut plaintext_blobs: HashMap<String, Vec<u8>> = HashMap::new();
534    for entry in &manifest.entries {
535        if plaintext_blobs.contains_key(&entry.hash) {
536            continue;
537        }
538
539        let blob_path = source_blobs_dir.join(format!("{}.bin", entry.hash));
540        ensure_regular_ciphertext_file(&blob_path, &format!("attachment blob {}", entry.hash))?;
541        let ciphertext = fs::read(&blob_path)
542            .with_context(|| format!("Failed to read attachment blob {}", entry.hash))?;
543        let plaintext = decrypt_blob(&ciphertext, old_dek, old_export_id, &entry.hash)
544            .with_context(|| format!("Failed to decrypt attachment blob {}", entry.hash))?;
545        plaintext_blobs.insert(entry.hash.clone(), plaintext);
546    }
547
548    let cipher = Aes256Gcm::new_from_slice(new_dek).expect("Invalid DEK length");
549
550    for (hash, data) in plaintext_blobs {
551        let nonce = derive_blob_nonce(&hash);
552        let hash_bytes = hex::decode(&hash).context("Invalid hash hex")?;
553        let mut aad = Vec::with_capacity(new_export_id.len() + hash_bytes.len());
554        aad.extend_from_slice(new_export_id);
555        aad.extend_from_slice(&hash_bytes);
556
557        let ciphertext = cipher
558            .encrypt(
559                Nonce::from_slice(&nonce),
560                Payload {
561                    msg: data.as_slice(),
562                    aad: &aad,
563                },
564            )
565            .map_err(|e| anyhow::anyhow!("Blob encryption failed during key rotation: {}", e))?;
566
567        write_ciphertext_file(
568            &output_blobs_dir.join(format!("{}.bin", hash)),
569            &ciphertext,
570            "attachment blob",
571        )
572        .with_context(|| format!("Failed to rewrite attachment blob {}", hash))?;
573    }
574
575    let manifest_json =
576        serde_json::to_vec(&manifest).context("Failed to serialize attachment manifest")?;
577    let manifest_nonce = derive_blob_nonce("manifest");
578    let reencrypted_manifest = cipher
579        .encrypt(
580            Nonce::from_slice(&manifest_nonce),
581            Payload {
582                msg: &manifest_json,
583                aad: new_export_id,
584            },
585        )
586        .map_err(|e| anyhow::anyhow!("Manifest encryption failed during key rotation: {}", e))?;
587
588    write_ciphertext_file(
589        &output_blobs_dir.join("manifest.enc"),
590        &reencrypted_manifest,
591        "attachment manifest",
592    )
593    .context("Failed to rewrite attachment manifest during key rotation")?;
594
595    Ok(())
596}
597
598fn ensure_real_output_directory(path: &Path, label: &str) -> Result<()> {
599    ensure_existing_ancestors_have_no_symlinks(path, label)?;
600    fs::create_dir_all(path).with_context(|| format!("Failed to create {label}"))?;
601    ensure_existing_ancestors_have_no_symlinks(path, label)?;
602
603    let metadata =
604        fs::symlink_metadata(path).with_context(|| format!("Failed to inspect {label}"))?;
605    let file_type = metadata.file_type();
606    if file_type.is_symlink() {
607        bail!("{label} must not be a symlink: {}", path.display());
608    }
609    if !file_type.is_dir() {
610        bail!("{label} must be a directory: {}", path.display());
611    }
612    Ok(())
613}
614
615fn ensure_existing_ancestors_have_no_symlinks(path: &Path, label: &str) -> Result<()> {
616    let mut ancestors: Vec<PathBuf> = path
617        .ancestors()
618        .filter(|ancestor| !ancestor.as_os_str().is_empty())
619        .map(Path::to_path_buf)
620        .collect();
621    ancestors.reverse();
622
623    for ancestor in ancestors {
624        match fs::symlink_metadata(&ancestor) {
625            Ok(metadata) => {
626                let file_type = metadata.file_type();
627                if file_type.is_symlink() {
628                    if is_allowed_system_symlink_ancestor(&ancestor) {
629                        continue;
630                    }
631                    bail!("{label} must not contain symlinks: {}", ancestor.display());
632                }
633                if !file_type.is_dir() {
634                    bail!(
635                        "{label} parent path must be a directory: {}",
636                        ancestor.display()
637                    );
638                }
639            }
640            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
641            Err(err) => {
642                return Err(err)
643                    .with_context(|| format!("Failed to inspect {label} {}", ancestor.display()));
644            }
645        }
646    }
647
648    Ok(())
649}
650
651#[cfg(target_os = "macos")]
652fn is_allowed_system_symlink_ancestor(path: &Path) -> bool {
653    path == Path::new("/var") || path == Path::new("/tmp")
654}
655
656#[cfg(not(target_os = "macos"))]
657fn is_allowed_system_symlink_ancestor(_path: &Path) -> bool {
658    false
659}
660
661fn write_ciphertext_file(path: &Path, bytes: &[u8], label: &str) -> Result<()> {
662    ensure_replaceable_regular_file(path, label)?;
663    let (mut pending, file) = PendingCiphertextFile::create(path, label)?;
664    let mut writer = BufWriter::new(file);
665    writer
666        .write_all(bytes)
667        .with_context(|| format!("Failed to write {label} {}", pending.path().display()))?;
668    writer
669        .flush()
670        .with_context(|| format!("Failed to flush {label} {}", pending.path().display()))?;
671    writer
672        .get_ref()
673        .sync_all()
674        .with_context(|| format!("Failed to sync {label} {}", pending.path().display()))?;
675    drop(writer);
676    pending.persist(path, label)
677}
678
679fn ensure_replaceable_regular_file(path: &Path, label: &str) -> Result<()> {
680    match fs::symlink_metadata(path) {
681        Ok(metadata) => {
682            let file_type = metadata.file_type();
683            if file_type.is_symlink() {
684                bail!(
685                    "Refusing to write {label} through symlink: {}",
686                    path.display()
687                );
688            }
689            if !file_type.is_file() {
690                bail!(
691                    "Refusing to replace {label} at non-file path: {}",
692                    path.display()
693                );
694            }
695            Ok(())
696        }
697        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
698        Err(err) => {
699            Err(err).with_context(|| format!("Failed to inspect {label} {}", path.display()))
700        }
701    }
702}
703
704struct PendingCiphertextFile {
705    path: PathBuf,
706    keep: bool,
707}
708
709impl PendingCiphertextFile {
710    fn create(final_path: &Path, label: &str) -> Result<(Self, File)> {
711        let parent = output_parent(final_path);
712        let file_name = final_path
713            .file_name()
714            .ok_or_else(|| anyhow::anyhow!("{label} path must name a file"))?
715            .to_string_lossy();
716
717        for attempt in 0..100u32 {
718            let random: u64 = rand::random();
719            let temp_path = parent.join(format!(
720                ".{file_name}.cass-attachment-tmp.{}.{}.{:016x}",
721                std::process::id(),
722                attempt,
723                random
724            ));
725
726            match OpenOptions::new()
727                .write(true)
728                .create_new(true)
729                .open(&temp_path)
730            {
731                Ok(file) => {
732                    return Ok((
733                        Self {
734                            path: temp_path,
735                            keep: false,
736                        },
737                        file,
738                    ));
739                }
740                Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => continue,
741                Err(err) => {
742                    return Err(err).with_context(|| {
743                        format!("Failed to create temporary {label} {}", temp_path.display())
744                    });
745                }
746            }
747        }
748
749        bail!(
750            "Failed to create a unique temporary {label} next to {} after 100 attempts",
751            final_path.display()
752        );
753    }
754
755    fn path(&self) -> &Path {
756        &self.path
757    }
758
759    fn persist(&mut self, final_path: &Path, label: &str) -> Result<()> {
760        replace_ciphertext_file_from_temp(&self.path, final_path, label)?;
761        self.keep = true;
762        Ok(())
763    }
764}
765
766impl Drop for PendingCiphertextFile {
767    fn drop(&mut self) {
768        if !self.keep {
769            let _ = fs::remove_file(&self.path);
770        }
771    }
772}
773
774fn output_parent(path: &Path) -> &Path {
775    path.parent()
776        .filter(|parent| !parent.as_os_str().is_empty())
777        .unwrap_or_else(|| Path::new("."))
778}
779
780fn replace_ciphertext_file_from_temp(
781    temp_path: &Path,
782    final_path: &Path,
783    label: &str,
784) -> Result<()> {
785    replace_ciphertext_file_from_temp_impl(temp_path, final_path, label)?;
786    sync_parent_directory(final_path)
787}
788
789#[cfg(not(windows))]
790fn replace_ciphertext_file_from_temp_impl(
791    temp_path: &Path,
792    final_path: &Path,
793    label: &str,
794) -> Result<()> {
795    fs::rename(temp_path, final_path).with_context(|| {
796        format!(
797            "Failed to install {label} {} from {}",
798            final_path.display(),
799            temp_path.display()
800        )
801    })
802}
803
804#[cfg(windows)]
805fn replace_ciphertext_file_from_temp_impl(
806    temp_path: &Path,
807    final_path: &Path,
808    label: &str,
809) -> Result<()> {
810    ensure_replaceable_regular_file(final_path, label)?;
811    match fs::rename(temp_path, final_path) {
812        Ok(()) => Ok(()),
813        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
814            fs::copy(temp_path, final_path).with_context(|| {
815                format!(
816                    "Failed to install {label} {} from {}",
817                    final_path.display(),
818                    temp_path.display()
819                )
820            })?;
821            fs::remove_file(temp_path).with_context(|| {
822                format!(
823                    "Failed to remove temporary {label} {} after install",
824                    temp_path.display()
825                )
826            })?;
827            Ok(())
828        }
829        Err(err) => Err(err).with_context(|| {
830            format!(
831                "Failed to install {label} {} from {}",
832                final_path.display(),
833                temp_path.display()
834            )
835        }),
836    }
837}
838
839#[cfg(not(windows))]
840fn sync_parent_directory(path: &Path) -> Result<()> {
841    let Some(parent) = path
842        .parent()
843        .filter(|parent| !parent.as_os_str().is_empty())
844    else {
845        return Ok(());
846    };
847    File::open(parent)
848        .with_context(|| format!("Failed to open parent directory {}", parent.display()))?
849        .sync_all()
850        .with_context(|| format!("Failed to sync parent directory {}", parent.display()))
851}
852
853#[cfg(windows)]
854fn sync_parent_directory(_path: &Path) -> Result<()> {
855    Ok(())
856}
857
858fn ensure_regular_ciphertext_file(path: &Path, label: &str) -> Result<()> {
859    let metadata = fs::symlink_metadata(path)
860        .with_context(|| format!("Failed to inspect {label} at {}", path.display()))?;
861    let file_type = metadata.file_type();
862    if file_type.is_symlink() {
863        bail!("Refusing to read {label} from symlink: {}", path.display());
864    }
865    if !file_type.is_file() {
866        bail!(
867            "Refusing to read {label} from non-file path: {}",
868            path.display()
869        );
870    }
871    Ok(())
872}
873
874#[cfg(test)]
875mod tests {
876    use super::*;
877
878    #[test]
879    fn test_default_config_disabled() {
880        let config = AttachmentConfig::default();
881        assert!(!config.enabled);
882    }
883
884    #[test]
885    fn test_enabled_config() {
886        let config = AttachmentConfig::enabled();
887        assert!(config.enabled);
888        assert_eq!(config.max_file_size_bytes, DEFAULT_MAX_FILE_SIZE);
889        assert_eq!(config.max_total_size_bytes, DEFAULT_MAX_TOTAL_SIZE);
890    }
891
892    #[test]
893    fn test_mime_type_check() {
894        let config = AttachmentConfig::enabled();
895        assert!(config.is_mime_allowed("image/png"));
896        assert!(config.is_mime_allowed("IMAGE/PNG"));
897        assert!(config.is_mime_allowed("text/plain; charset=utf-8"));
898        assert!(config.is_mime_allowed("image/jpeg"));
899        assert!(config.is_mime_allowed("application/pdf"));
900        assert!(config.is_mime_allowed("text/plain"));
901        assert!(!config.is_mime_allowed("application/octet-stream"));
902        assert!(!config.is_mime_allowed("video/mp4"));
903        assert!(!config.is_mime_allowed("image/png-malicious"));
904        assert!(!config.is_mime_allowed("text/html+xml"));
905        assert!(!config.is_mime_allowed(""));
906    }
907
908    #[test]
909    fn test_size_limit_per_file() {
910        let config = AttachmentConfig::enabled().with_max_file_size(1024);
911        let mut processor = AttachmentProcessor::new(config);
912
913        let large_attachment = AttachmentData {
914            filename: "large.txt".to_string(),
915            mime_type: "text/plain".to_string(),
916            data: vec![0u8; 2048], // Over limit
917        };
918
919        let refs = processor
920            .process_attachments(1, &[large_attachment])
921            .unwrap();
922
923        assert!(refs.is_empty()); // Should be skipped
924        assert_eq!(processor.skipped_count(), 1);
925    }
926
927    #[test]
928    fn test_total_size_limit() {
929        let config = AttachmentConfig::enabled()
930            .with_max_file_size(1024)
931            .with_max_total_size(2048);
932        let mut processor = AttachmentProcessor::new(config);
933
934        // Add 3 attachments of ~800 bytes each - should only get 2
935        for i in 0..3 {
936            let attachment = AttachmentData {
937                filename: format!("file{}.txt", i),
938                mime_type: "text/plain".to_string(),
939                data: vec![i as u8; 800],
940            };
941            processor.process_attachments(i as i64, &[attachment]).ok();
942        }
943
944        assert_eq!(processor.count(), 2);
945        assert_eq!(processor.skipped_count(), 1);
946    }
947
948    #[test]
949    fn test_deduplication() {
950        let config = AttachmentConfig::enabled();
951        let mut processor = AttachmentProcessor::new(config);
952
953        let data = vec![1u8, 2, 3, 4, 5];
954
955        // Same data in two attachments
956        let attachment1 = AttachmentData {
957            filename: "file1.txt".to_string(),
958            mime_type: "text/plain".to_string(),
959            data: data.clone(),
960        };
961        let attachment2 = AttachmentData {
962            filename: "file2.txt".to_string(),
963            mime_type: "text/plain".to_string(),
964            data: data.clone(),
965        };
966
967        processor.process_attachments(1, &[attachment1]).unwrap();
968        processor.process_attachments(2, &[attachment2]).unwrap();
969
970        // Two entries but only one unique blob
971        assert_eq!(processor.count(), 2);
972        assert_eq!(processor.blobs.len(), 1);
973        // Size should only count once
974        assert_eq!(processor.total_size(), data.len());
975    }
976
977    #[test]
978    fn test_sha256_hash() {
979        let data = b"hello world";
980        let hash = compute_sha256_hex(data);
981        assert_eq!(
982            hash,
983            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
984        );
985    }
986
987    #[test]
988    fn test_blob_nonce_deterministic() {
989        let nonce1 = derive_blob_nonce("test-hash");
990        let nonce2 = derive_blob_nonce("test-hash");
991        assert_eq!(nonce1, nonce2);
992
993        let nonce3 = derive_blob_nonce("different-hash");
994        assert_ne!(nonce1, nonce3);
995    }
996
997    #[test]
998    fn test_blob_encryption_roundtrip() {
999        let data = b"secret attachment data";
1000        let dek = [0x42u8; 32];
1001        let export_id = [0x01u8; 16];
1002        let hash = compute_sha256_hex(data);
1003
1004        // Encrypt
1005        let cipher = Aes256Gcm::new_from_slice(&dek).unwrap();
1006        let nonce = derive_blob_nonce(&hash);
1007        let hash_bytes = hex::decode(&hash).unwrap();
1008        let mut aad = Vec::new();
1009        aad.extend_from_slice(&export_id);
1010        aad.extend_from_slice(&hash_bytes);
1011
1012        let ciphertext = cipher
1013            .encrypt(
1014                Nonce::from_slice(&nonce),
1015                Payload {
1016                    msg: &data[..],
1017                    aad: &aad,
1018                },
1019            )
1020            .unwrap();
1021
1022        // Decrypt
1023        let plaintext = decrypt_blob(&ciphertext, &dek, &export_id, &hash).unwrap();
1024
1025        assert_eq!(plaintext, data);
1026    }
1027
1028    #[test]
1029    fn test_write_encrypted_blobs() {
1030        use tempfile::TempDir;
1031
1032        let config = AttachmentConfig::enabled();
1033        let mut processor = AttachmentProcessor::new(config);
1034
1035        let attachment = AttachmentData {
1036            filename: "test.txt".to_string(),
1037            mime_type: "text/plain".to_string(),
1038            data: b"test content".to_vec(),
1039        };
1040
1041        processor.process_attachments(1, &[attachment]).unwrap();
1042
1043        let temp_dir = TempDir::new().unwrap();
1044        let dek = [0x42u8; 32];
1045        let export_id = [0x01u8; 16];
1046
1047        let manifest = processor
1048            .write_encrypted_blobs(temp_dir.path(), &dek, &export_id)
1049            .unwrap();
1050
1051        // Check blobs directory exists
1052        let blobs_dir = temp_dir.path().join("blobs");
1053        assert!(blobs_dir.exists());
1054
1055        // Check manifest.enc exists
1056        assert!(blobs_dir.join("manifest.enc").exists());
1057
1058        // Check manifest contents
1059        assert_eq!(manifest.entries.len(), 1);
1060        assert_eq!(manifest.entries[0].filename, "test.txt");
1061
1062        // Check blob file exists
1063        let blob_path = blobs_dir.join(format!("{}.bin", manifest.entries[0].hash));
1064        assert!(blob_path.exists());
1065
1066        // Verify decryption
1067        let ciphertext = std::fs::read(&blob_path).unwrap();
1068        let plaintext =
1069            decrypt_blob(&ciphertext, &dek, &export_id, &manifest.entries[0].hash).unwrap();
1070        assert_eq!(plaintext, b"test content");
1071    }
1072
1073    #[test]
1074    #[cfg(unix)]
1075    fn test_write_encrypted_blobs_rejects_symlinked_blobs_directory() {
1076        use std::os::unix::fs::symlink;
1077        use tempfile::TempDir;
1078
1079        let config = AttachmentConfig::enabled();
1080        let mut processor = AttachmentProcessor::new(config);
1081        let attachment = AttachmentData {
1082            filename: "test.txt".to_string(),
1083            mime_type: "text/plain".to_string(),
1084            data: b"test content".to_vec(),
1085        };
1086        processor.process_attachments(1, &[attachment]).unwrap();
1087
1088        let output_dir = TempDir::new().unwrap();
1089        let outside_dir = TempDir::new().unwrap();
1090        symlink(outside_dir.path(), output_dir.path().join("blobs")).unwrap();
1091
1092        let dek = [0x42u8; 32];
1093        let export_id = [0x01u8; 16];
1094        let err = processor
1095            .write_encrypted_blobs(output_dir.path(), &dek, &export_id)
1096            .unwrap_err();
1097
1098        assert!(
1099            err.to_string().contains("must not contain symlinks")
1100                || err.to_string().contains("must not be a symlink"),
1101            "unexpected error: {err:#}"
1102        );
1103        assert!(
1104            !outside_dir.path().join("manifest.enc").exists(),
1105            "attachment writer must not write through a symlinked blobs directory"
1106        );
1107    }
1108
1109    #[test]
1110    #[cfg(unix)]
1111    fn test_write_encrypted_blobs_rejects_symlinked_blob_file() {
1112        use std::os::unix::fs::symlink;
1113        use tempfile::TempDir;
1114
1115        let config = AttachmentConfig::enabled();
1116        let mut processor = AttachmentProcessor::new(config);
1117        let data = b"test content".to_vec();
1118        let hash = compute_sha256_hex(&data);
1119        let attachment = AttachmentData {
1120            filename: "test.txt".to_string(),
1121            mime_type: "text/plain".to_string(),
1122            data,
1123        };
1124        processor.process_attachments(1, &[attachment]).unwrap();
1125
1126        let output_dir = TempDir::new().unwrap();
1127        let blobs_dir = output_dir.path().join("blobs");
1128        fs::create_dir_all(&blobs_dir).unwrap();
1129        let protected_target = output_dir.path().join("protected.bin");
1130        fs::write(&protected_target, b"do not overwrite").unwrap();
1131        symlink(&protected_target, blobs_dir.join(format!("{hash}.bin"))).unwrap();
1132
1133        let dek = [0x42u8; 32];
1134        let export_id = [0x01u8; 16];
1135        let err = processor
1136            .write_encrypted_blobs(output_dir.path(), &dek, &export_id)
1137            .unwrap_err();
1138
1139        assert!(
1140            err.to_string().contains("through symlink"),
1141            "unexpected error: {err:#}"
1142        );
1143        assert_eq!(
1144            fs::read(&protected_target).unwrap(),
1145            b"do not overwrite",
1146            "attachment writer must not clobber a symlink target"
1147        );
1148    }
1149
1150    #[test]
1151    fn test_manifest_encryption_roundtrip() {
1152        let manifest = AttachmentManifest {
1153            version: 1,
1154            entries: vec![AttachmentEntry {
1155                hash: "abc123".to_string(),
1156                filename: "test.txt".to_string(),
1157                mime_type: "text/plain".to_string(),
1158                size_bytes: 100,
1159                message_id: 1,
1160            }],
1161            total_size_bytes: 100,
1162        };
1163
1164        let dek = [0x42u8; 32];
1165        let export_id = [0x01u8; 16];
1166
1167        // Encrypt
1168        let cipher = Aes256Gcm::new_from_slice(&dek).unwrap();
1169        let nonce = derive_blob_nonce("manifest");
1170        let manifest_json = serde_json::to_vec(&manifest).unwrap();
1171
1172        let ciphertext = cipher
1173            .encrypt(
1174                Nonce::from_slice(&nonce),
1175                Payload {
1176                    msg: &manifest_json,
1177                    aad: &export_id,
1178                },
1179            )
1180            .unwrap();
1181
1182        // Decrypt
1183        let decrypted = decrypt_manifest(&ciphertext, &dek, &export_id).unwrap();
1184
1185        assert_eq!(decrypted.entries.len(), 1);
1186        assert_eq!(decrypted.entries[0].hash, "abc123");
1187    }
1188
1189    #[test]
1190    fn test_reencrypt_existing_blobs_roundtrip() {
1191        use tempfile::TempDir;
1192
1193        let config = AttachmentConfig::enabled();
1194        let mut processor = AttachmentProcessor::new(config);
1195        let attachment = AttachmentData {
1196            filename: "test.txt".to_string(),
1197            mime_type: "text/plain".to_string(),
1198            data: b"test content".to_vec(),
1199        };
1200        processor.process_attachments(1, &[attachment]).unwrap();
1201
1202        let temp_dir = TempDir::new().unwrap();
1203        let old_dek = [0x42u8; 32];
1204        let old_export_id = [0x01u8; 16];
1205        let new_dek = [0x24u8; 32];
1206        let new_export_id = [0x02u8; 16];
1207
1208        let manifest = processor
1209            .write_encrypted_blobs(temp_dir.path(), &old_dek, &old_export_id)
1210            .unwrap();
1211
1212        reencrypt_blobs_into_dir(
1213            temp_dir.path(),
1214            temp_dir.path(),
1215            &old_dek,
1216            &old_export_id,
1217            &new_dek,
1218            &new_export_id,
1219        )
1220        .unwrap();
1221
1222        let blobs_dir = temp_dir.path().join("blobs");
1223        let manifest_ciphertext = fs::read(blobs_dir.join("manifest.enc")).unwrap();
1224        let decrypted_manifest =
1225            decrypt_manifest(&manifest_ciphertext, &new_dek, &new_export_id).unwrap();
1226        assert_eq!(decrypted_manifest.entries.len(), 1);
1227        assert_eq!(decrypted_manifest.entries[0].hash, manifest.entries[0].hash);
1228
1229        let blob_ciphertext =
1230            fs::read(blobs_dir.join(format!("{}.bin", manifest.entries[0].hash))).unwrap();
1231        let blob_plaintext = decrypt_blob(
1232            &blob_ciphertext,
1233            &new_dek,
1234            &new_export_id,
1235            &manifest.entries[0].hash,
1236        )
1237        .unwrap();
1238        assert_eq!(blob_plaintext, b"test content");
1239        assert!(decrypt_manifest(&manifest_ciphertext, &old_dek, &old_export_id).is_err());
1240    }
1241
1242    #[test]
1243    #[cfg(unix)]
1244    fn test_reencrypt_existing_blobs_rejects_symlinked_blobs_directory() {
1245        use std::os::unix::fs::symlink;
1246        use tempfile::TempDir;
1247
1248        let config = AttachmentConfig::enabled();
1249        let mut processor = AttachmentProcessor::new(config);
1250        let attachment = AttachmentData {
1251            filename: "test.txt".to_string(),
1252            mime_type: "text/plain".to_string(),
1253            data: b"test content".to_vec(),
1254        };
1255        processor.process_attachments(1, &[attachment]).unwrap();
1256
1257        let source_archive_dir = TempDir::new().unwrap();
1258        let outside_dir = TempDir::new().unwrap();
1259        let output_archive_dir = TempDir::new().unwrap();
1260        let old_dek = [0x42u8; 32];
1261        let old_export_id = [0x01u8; 16];
1262        let new_dek = [0x24u8; 32];
1263        let new_export_id = [0x02u8; 16];
1264
1265        processor
1266            .write_encrypted_blobs(outside_dir.path(), &old_dek, &old_export_id)
1267            .unwrap();
1268        symlink(
1269            outside_dir.path().join("blobs"),
1270            source_archive_dir.path().join("blobs"),
1271        )
1272        .unwrap();
1273
1274        let err = reencrypt_blobs_into_dir(
1275            source_archive_dir.path(),
1276            output_archive_dir.path(),
1277            &old_dek,
1278            &old_export_id,
1279            &new_dek,
1280            &new_export_id,
1281        )
1282        .unwrap_err();
1283
1284        assert!(
1285            err.to_string().contains("symlink"),
1286            "unexpected error: {err:#}"
1287        );
1288    }
1289
1290    #[test]
1291    #[cfg(unix)]
1292    fn test_reencrypt_existing_blobs_rejects_symlinked_destination_directory() {
1293        use std::os::unix::fs::symlink;
1294        use tempfile::TempDir;
1295
1296        let config = AttachmentConfig::enabled();
1297        let mut processor = AttachmentProcessor::new(config);
1298        let attachment = AttachmentData {
1299            filename: "test.txt".to_string(),
1300            mime_type: "text/plain".to_string(),
1301            data: b"test content".to_vec(),
1302        };
1303        processor.process_attachments(1, &[attachment]).unwrap();
1304
1305        let source_archive_dir = TempDir::new().unwrap();
1306        let output_archive_dir = TempDir::new().unwrap();
1307        let outside_dir = TempDir::new().unwrap();
1308        let old_dek = [0x42u8; 32];
1309        let old_export_id = [0x01u8; 16];
1310        let new_dek = [0x24u8; 32];
1311        let new_export_id = [0x02u8; 16];
1312
1313        processor
1314            .write_encrypted_blobs(source_archive_dir.path(), &old_dek, &old_export_id)
1315            .unwrap();
1316        fs::create_dir_all(outside_dir.path().join("elsewhere")).unwrap();
1317        symlink(
1318            outside_dir.path().join("elsewhere"),
1319            output_archive_dir.path().join("blobs"),
1320        )
1321        .unwrap();
1322
1323        let err = reencrypt_blobs_into_dir(
1324            source_archive_dir.path(),
1325            output_archive_dir.path(),
1326            &old_dek,
1327            &old_export_id,
1328            &new_dek,
1329            &new_export_id,
1330        )
1331        .unwrap_err();
1332
1333        assert!(
1334            err.to_string().contains("symlink"),
1335            "unexpected error: {err:#}"
1336        );
1337    }
1338
1339    #[test]
1340    #[cfg(unix)]
1341    fn test_reencrypt_existing_blobs_rejects_symlinked_destination_archive_dir() {
1342        use std::os::unix::fs::symlink;
1343        use tempfile::TempDir;
1344
1345        let config = AttachmentConfig::enabled();
1346        let mut processor = AttachmentProcessor::new(config);
1347        let attachment = AttachmentData {
1348            filename: "test.txt".to_string(),
1349            mime_type: "text/plain".to_string(),
1350            data: b"test content".to_vec(),
1351        };
1352        processor.process_attachments(1, &[attachment]).unwrap();
1353
1354        let source_archive_dir = TempDir::new().unwrap();
1355        let link_parent = TempDir::new().unwrap();
1356        let outside_dir = TempDir::new().unwrap();
1357        let output_archive_link = link_parent.path().join("archive-link");
1358        let old_dek = [0x42u8; 32];
1359        let old_export_id = [0x01u8; 16];
1360        let new_dek = [0x24u8; 32];
1361        let new_export_id = [0x02u8; 16];
1362
1363        processor
1364            .write_encrypted_blobs(source_archive_dir.path(), &old_dek, &old_export_id)
1365            .unwrap();
1366        symlink(outside_dir.path(), &output_archive_link).unwrap();
1367
1368        let err = reencrypt_blobs_into_dir(
1369            source_archive_dir.path(),
1370            &output_archive_link,
1371            &old_dek,
1372            &old_export_id,
1373            &new_dek,
1374            &new_export_id,
1375        )
1376        .unwrap_err();
1377
1378        assert!(
1379            err.to_string().contains("symlink"),
1380            "unexpected error: {err:#}"
1381        );
1382        assert!(
1383            !outside_dir.path().join("blobs/manifest.enc").exists(),
1384            "key rotation must not write attachments through a symlinked archive directory"
1385        );
1386    }
1387}