Skip to main content

coding_agent_search/pages/
attachments.rs

1//! Attachment support for pages export.
2//!
3//! Implements opt-in attachment handling for images, PDFs, and code snapshots
4//! with proper encryption, size limits, and lazy loading.
5//!
6//! # Overview
7//!
8//! Attachments are stored in a `blobs/` directory with:
9//! - Each blob named by its SHA-256 hash
10//! - Blobs individually encrypted with unique nonces
11//! - A manifest file mapping hashes to metadata
12//!
13//! # Size Limits
14//!
15//! - Per-file maximum: 10 MB (default)
16//! - Total maximum: 100 MB (default, configurable)
17
18use aes_gcm::{
19    Aes256Gcm, Nonce,
20    aead::{Aead, KeyInit, Payload},
21};
22use anyhow::{Context, Result, bail};
23use serde::{Deserialize, Serialize};
24use sha2::{Digest, Sha256};
25use std::collections::HashMap;
26use std::fs::{self, File, OpenOptions};
27use std::io::{BufWriter, Write};
28use std::path::{Path, PathBuf};
29use tracing::{debug, info, warn};
30
31/// Default maximum size per attachment (10 MB)
32pub const DEFAULT_MAX_FILE_SIZE: usize = 10 * 1024 * 1024;
33
34/// Default maximum total size for all attachments (100 MB)
35pub const DEFAULT_MAX_TOTAL_SIZE: usize = 100 * 1024 * 1024;
36
37/// Default allowed MIME types
38pub const DEFAULT_ALLOWED_MIME_TYPES: &[&str] = &[
39    // Images
40    "image/png",
41    "image/jpeg",
42    "image/gif",
43    "image/webp",
44    "image/svg+xml",
45    // Documents
46    "application/pdf",
47    // Text
48    "text/plain",
49    "text/html",
50    "text/css",
51    "text/javascript",
52    "application/json",
53    "application/xml",
54];
55
56/// Nonce derivation domain separator for blob encryption
57const BLOB_NONCE_DOMAIN: &[u8] = b"cass-blob-nonce-v1";
58
59/// Configuration for attachment processing
60#[derive(Debug, Clone)]
61pub struct AttachmentConfig {
62    /// Whether attachment processing is enabled
63    pub enabled: bool,
64    /// Maximum size per file in bytes
65    pub max_file_size_bytes: usize,
66    /// Maximum total size for all attachments in bytes
67    pub max_total_size_bytes: usize,
68    /// Allowed MIME types
69    pub allowed_mime_types: Vec<String>,
70}
71
72impl Default for AttachmentConfig {
73    fn default() -> Self {
74        Self {
75            enabled: false, // Disabled by default
76            max_file_size_bytes: DEFAULT_MAX_FILE_SIZE,
77            max_total_size_bytes: DEFAULT_MAX_TOTAL_SIZE,
78            allowed_mime_types: DEFAULT_ALLOWED_MIME_TYPES
79                .iter()
80                .map(|s| s.to_string())
81                .collect(),
82        }
83    }
84}
85
86impl AttachmentConfig {
87    /// Create a new config with attachments enabled
88    pub fn enabled() -> Self {
89        Self {
90            enabled: true,
91            ..Default::default()
92        }
93    }
94
95    /// Set the maximum file size
96    pub fn with_max_file_size(mut self, bytes: usize) -> Self {
97        self.max_file_size_bytes = bytes;
98        self
99    }
100
101    /// Set the maximum total size
102    pub fn with_max_total_size(mut self, bytes: usize) -> Self {
103        self.max_total_size_bytes = bytes;
104        self
105    }
106
107    /// Check if a MIME type is allowed
108    pub fn is_mime_allowed(&self, mime_type: &str) -> bool {
109        let Some(mime_type) = mime_type_essence(mime_type) else {
110            return false;
111        };
112        self.allowed_mime_types
113            .iter()
114            .filter_map(|allowed| mime_type_essence(allowed))
115            .any(|allowed| mime_type == allowed)
116    }
117}
118
119fn mime_type_essence(mime_type: &str) -> Option<String> {
120    let essence = mime_type.split(';').next()?.trim();
121    if essence.is_empty() {
122        return None;
123    }
124    Some(essence.to_ascii_lowercase())
125}
126
127/// Raw attachment data from a connector
128#[derive(Debug, Clone)]
129pub struct AttachmentData {
130    /// Original filename
131    pub filename: String,
132    /// MIME type
133    pub mime_type: String,
134    /// Raw data bytes
135    pub data: Vec<u8>,
136}
137
138/// Metadata for a processed attachment entry
139#[derive(Debug, Clone, Serialize, Deserialize)]
140pub struct AttachmentEntry {
141    /// SHA-256 hash of plaintext (used as blob filename)
142    pub hash: String,
143    /// Original filename
144    pub filename: String,
145    /// MIME type
146    pub mime_type: String,
147    /// Size in bytes
148    pub size_bytes: usize,
149    /// Associated message ID
150    pub message_id: i64,
151}
152
153/// Manifest containing all attachment metadata
154#[derive(Debug, Clone, Serialize, Deserialize)]
155pub struct AttachmentManifest {
156    /// Version of the manifest format
157    pub version: u8,
158    /// List of all attachments
159    pub entries: Vec<AttachmentEntry>,
160    /// Total size of all attachments
161    pub total_size_bytes: usize,
162}
163
164impl Default for AttachmentManifest {
165    fn default() -> Self {
166        Self {
167            version: 1,
168            entries: Vec::new(),
169            total_size_bytes: 0,
170        }
171    }
172}
173
174/// Attachment processor that collects and encrypts attachments
175pub struct AttachmentProcessor {
176    config: AttachmentConfig,
177    entries: Vec<AttachmentEntry>,
178    /// Map of hash -> data for deduplication
179    blobs: HashMap<String, Vec<u8>>,
180    total_size: usize,
181    /// Count of skipped attachments
182    skipped_count: usize,
183}
184
185impl AttachmentProcessor {
186    /// Create a new attachment processor with the given configuration
187    pub fn new(config: AttachmentConfig) -> Self {
188        Self {
189            config,
190            entries: Vec::new(),
191            blobs: HashMap::new(),
192            total_size: 0,
193            skipped_count: 0,
194        }
195    }
196
197    /// Check if attachment processing is enabled
198    pub fn is_enabled(&self) -> bool {
199        self.config.enabled
200    }
201
202    /// Get the current total size
203    pub fn total_size(&self) -> usize {
204        self.total_size
205    }
206
207    /// Get the number of processed attachments
208    pub fn count(&self) -> usize {
209        self.entries.len()
210    }
211
212    /// Get the number of skipped attachments
213    pub fn skipped_count(&self) -> usize {
214        self.skipped_count
215    }
216
217    /// Process attachments from a message
218    ///
219    /// Returns a list of blob hashes that were successfully processed.
220    /// Attachments that exceed size limits or have disallowed MIME types
221    /// are logged and skipped.
222    pub fn process_attachments(
223        &mut self,
224        message_id: i64,
225        attachments: &[AttachmentData],
226    ) -> Result<Vec<String>> {
227        if !self.config.enabled {
228            return Ok(Vec::new());
229        }
230
231        let mut refs = Vec::new();
232
233        for attachment in attachments {
234            // Check MIME type
235            if !self.config.is_mime_allowed(&attachment.mime_type) {
236                warn!(
237                    filename = %attachment.filename,
238                    mime_type = %attachment.mime_type,
239                    "Skipping attachment with disallowed MIME type"
240                );
241                self.skipped_count += 1;
242                continue;
243            }
244
245            // Check per-file size limit
246            if attachment.data.len() > self.config.max_file_size_bytes {
247                warn!(
248                    filename = %attachment.filename,
249                    size = attachment.data.len(),
250                    limit = self.config.max_file_size_bytes,
251                    "Skipping oversized attachment"
252                );
253                self.skipped_count += 1;
254                continue;
255            }
256
257            // Check total size limit
258            if self.total_size + attachment.data.len() > self.config.max_total_size_bytes {
259                warn!(
260                    filename = %attachment.filename,
261                    current_total = self.total_size,
262                    attachment_size = attachment.data.len(),
263                    limit = self.config.max_total_size_bytes,
264                    "Total attachment limit reached, skipping"
265                );
266                self.skipped_count += 1;
267                continue;
268            }
269
270            // Compute SHA-256 hash
271            let hash = compute_sha256_hex(&attachment.data);
272
273            // Check for deduplication
274            if self.blobs.contains_key(&hash) {
275                debug!(
276                    filename = %attachment.filename,
277                    hash = %hash,
278                    "Attachment already processed (deduplicated)"
279                );
280                // Still add the entry for this message
281                self.entries.push(AttachmentEntry {
282                    hash: hash.clone(),
283                    filename: attachment.filename.clone(),
284                    mime_type: attachment.mime_type.clone(),
285                    size_bytes: attachment.data.len(),
286                    message_id,
287                });
288                refs.push(hash);
289                continue;
290            }
291
292            // Store the blob
293            self.blobs.insert(hash.clone(), attachment.data.clone());
294            self.total_size += attachment.data.len();
295
296            // Create entry
297            self.entries.push(AttachmentEntry {
298                hash: hash.clone(),
299                filename: attachment.filename.clone(),
300                mime_type: attachment.mime_type.clone(),
301                size_bytes: attachment.data.len(),
302                message_id,
303            });
304
305            debug!(
306                filename = %attachment.filename,
307                hash = %hash,
308                size = attachment.data.len(),
309                "Processed attachment"
310            );
311
312            refs.push(hash);
313        }
314
315        Ok(refs)
316    }
317
318    /// Write encrypted blobs to the output directory
319    ///
320    /// Each blob is encrypted with AES-256-GCM using:
321    /// - DEK: Same data encryption key as main database
322    /// - Nonce: Derived from blob hash using HKDF
323    /// - AAD: export_id || hash bytes
324    pub fn write_encrypted_blobs(
325        &self,
326        output_dir: &Path,
327        dek: &[u8; 32],
328        export_id: &[u8; 16],
329    ) -> Result<AttachmentManifest> {
330        if self.blobs.is_empty() {
331            return Ok(AttachmentManifest::default());
332        }
333
334        let blobs_dir = output_dir.join("blobs");
335        ensure_real_output_directory(output_dir, "Attachment output directory")?;
336        ensure_real_output_directory(&blobs_dir, "Attachment blobs directory")?;
337
338        let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
339
340        for (hash, data) in &self.blobs {
341            let blob_path = blobs_dir.join(format!("{}.bin", hash));
342
343            // Derive nonce from hash
344            let nonce = derive_blob_nonce(hash);
345
346            // Build AAD: export_id || hash_bytes
347            let hash_bytes = hex::decode(hash).context("Invalid hash hex")?;
348            let mut aad = Vec::with_capacity(export_id.len() + hash_bytes.len());
349            aad.extend_from_slice(export_id);
350            aad.extend_from_slice(&hash_bytes);
351
352            // Encrypt
353            let ciphertext = cipher
354                .encrypt(
355                    Nonce::from_slice(&nonce),
356                    Payload {
357                        msg: data.as_slice(),
358                        aad: &aad,
359                    },
360                )
361                .map_err(|e| anyhow::anyhow!("Blob encryption failed: {}", e))?;
362
363            write_ciphertext_file(&blob_path, &ciphertext, "attachment blob")?;
364
365            debug!(hash = %hash, path = %blob_path.display(), "Wrote encrypted blob");
366        }
367
368        // Write encrypted manifest
369        let manifest = AttachmentManifest {
370            version: 1,
371            entries: self.entries.clone(),
372            total_size_bytes: self.total_size,
373        };
374
375        let manifest_json =
376            serde_json::to_vec(&manifest).context("Failed to serialize manifest")?;
377
378        // Use a fixed nonce for the manifest (derived from "manifest" string)
379        let manifest_nonce = derive_blob_nonce("manifest");
380
381        // AAD for manifest: just export_id
382        let manifest_ciphertext = cipher
383            .encrypt(
384                Nonce::from_slice(&manifest_nonce),
385                Payload {
386                    msg: &manifest_json,
387                    aad: export_id,
388                },
389            )
390            .map_err(|e| anyhow::anyhow!("Manifest encryption failed: {}", e))?;
391
392        let manifest_path = blobs_dir.join("manifest.enc");
393        write_ciphertext_file(&manifest_path, &manifest_ciphertext, "attachment manifest")?;
394
395        info!(
396            count = self.entries.len(),
397            unique_blobs = self.blobs.len(),
398            total_size = self.total_size,
399            skipped = self.skipped_count,
400            "Wrote encrypted attachments"
401        );
402
403        Ok(manifest)
404    }
405}
406
407/// Compute SHA-256 hash of data and return as lowercase hex string
408fn compute_sha256_hex(data: &[u8]) -> String {
409    let mut hasher = Sha256::new();
410    hasher.update(data);
411    let result = hasher.finalize();
412    hex::encode(result)
413}
414
415/// Derive a unique 12-byte nonce from a blob identifier using HKDF
416fn derive_blob_nonce(identifier: &str) -> [u8; 12] {
417    crate::encryption::hkdf_extract_expand(identifier.as_bytes(), BLOB_NONCE_DOMAIN, b"nonce", 12)
418        .expect("HKDF expansion should never fail for 12 bytes")
419        .try_into()
420        .expect("HKDF expansion should return the requested nonce length")
421}
422
423/// Decrypt a blob given the DEK, export_id, and hash
424pub fn decrypt_blob(
425    ciphertext: &[u8],
426    dek: &[u8; 32],
427    export_id: &[u8; 16],
428    hash: &str,
429) -> Result<Vec<u8>> {
430    let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
431
432    // Derive nonce from hash
433    let nonce = derive_blob_nonce(hash);
434
435    // Build AAD
436    let hash_bytes = hex::decode(hash).context("Invalid hash hex")?;
437    let mut aad = Vec::with_capacity(export_id.len() + hash_bytes.len());
438    aad.extend_from_slice(export_id);
439    aad.extend_from_slice(&hash_bytes);
440
441    // Decrypt
442    let plaintext = cipher
443        .decrypt(
444            Nonce::from_slice(&nonce),
445            Payload {
446                msg: ciphertext,
447                aad: &aad,
448            },
449        )
450        .map_err(|_| anyhow::anyhow!("Blob decryption failed"))?;
451
452    Ok(plaintext)
453}
454
455/// Decrypt the attachment manifest
456pub fn decrypt_manifest(
457    ciphertext: &[u8],
458    dek: &[u8; 32],
459    export_id: &[u8; 16],
460) -> Result<AttachmentManifest> {
461    let cipher = Aes256Gcm::new_from_slice(dek).expect("Invalid DEK length");
462
463    // Use fixed nonce for manifest
464    let nonce = derive_blob_nonce("manifest");
465
466    // Decrypt
467    let plaintext = cipher
468        .decrypt(
469            Nonce::from_slice(&nonce),
470            Payload {
471                msg: ciphertext,
472                aad: export_id,
473            },
474        )
475        .map_err(|_| anyhow::anyhow!("Manifest decryption failed"))?;
476
477    let manifest: AttachmentManifest =
478        serde_json::from_slice(&plaintext).context("Failed to deserialize manifest")?;
479
480    Ok(manifest)
481}
482
483pub(crate) fn reencrypt_blobs_into_dir(
484    source_archive_dir: &Path,
485    output_archive_dir: &Path,
486    old_dek: &[u8; 32],
487    old_export_id: &[u8; 16],
488    new_dek: &[u8; 32],
489    new_export_id: &[u8; 16],
490) -> Result<()> {
491    let source_blobs_dir = source_archive_dir.join("blobs");
492    ensure_existing_ancestors_have_no_symlinks(
493        &source_blobs_dir,
494        "Source attachment blobs directory",
495    )?;
496    match fs::symlink_metadata(&source_blobs_dir) {
497        Ok(meta) => {
498            let file_type = meta.file_type();
499            if file_type.is_symlink() {
500                bail!(
501                    "Refusing to re-encrypt attachments from symlinked blobs directory: {}",
502                    source_blobs_dir.display()
503                );
504            }
505            if !file_type.is_dir() {
506                bail!(
507                    "Refusing to re-encrypt attachments from non-directory blobs path: {}",
508                    source_blobs_dir.display()
509                );
510            }
511        }
512        Err(err) if err.kind() == std::io::ErrorKind::NotFound => return Ok(()),
513        Err(err) => {
514            return Err(err).with_context(|| {
515                format!(
516                    "Failed to inspect attachment blobs directory {}",
517                    source_blobs_dir.display()
518                )
519            });
520        }
521    }
522
523    let output_blobs_dir = output_archive_dir.join("blobs");
524    ensure_real_output_directory(&output_blobs_dir, "Destination attachment blobs directory")?;
525
526    let manifest_path = source_blobs_dir.join("manifest.enc");
527    ensure_regular_ciphertext_file(&manifest_path, "attachment manifest")?;
528    let manifest_ciphertext =
529        fs::read(&manifest_path).context("Failed to read attachment manifest for rekey")?;
530    let manifest = decrypt_manifest(&manifest_ciphertext, old_dek, old_export_id)
531        .context("Failed to decrypt attachment manifest during key rotation")?;
532
533    let mut plaintext_blobs: HashMap<String, Vec<u8>> = HashMap::new();
534    for entry in &manifest.entries {
535        if plaintext_blobs.contains_key(&entry.hash) {
536            continue;
537        }
538
539        let blob_path = source_blobs_dir.join(format!("{}.bin", entry.hash));
540        ensure_regular_ciphertext_file(&blob_path, &format!("attachment blob {}", entry.hash))?;
541        let ciphertext = fs::read(&blob_path)
542            .with_context(|| format!("Failed to read attachment blob {}", entry.hash))?;
543        let plaintext = decrypt_blob(&ciphertext, old_dek, old_export_id, &entry.hash)
544            .with_context(|| format!("Failed to decrypt attachment blob {}", entry.hash))?;
545        plaintext_blobs.insert(entry.hash.clone(), plaintext);
546    }
547
548    let cipher = Aes256Gcm::new_from_slice(new_dek).expect("Invalid DEK length");
549
550    for (hash, data) in plaintext_blobs {
551        let nonce = derive_blob_nonce(&hash);
552        let hash_bytes = hex::decode(&hash).context("Invalid hash hex")?;
553        let mut aad = Vec::with_capacity(new_export_id.len() + hash_bytes.len());
554        aad.extend_from_slice(new_export_id);
555        aad.extend_from_slice(&hash_bytes);
556
557        let ciphertext = cipher
558            .encrypt(
559                Nonce::from_slice(&nonce),
560                Payload {
561                    msg: data.as_slice(),
562                    aad: &aad,
563                },
564            )
565            .map_err(|e| anyhow::anyhow!("Blob encryption failed during key rotation: {}", e))?;
566
567        write_ciphertext_file(
568            &output_blobs_dir.join(format!("{}.bin", hash)),
569            &ciphertext,
570            "attachment blob",
571        )
572        .with_context(|| format!("Failed to rewrite attachment blob {}", hash))?;
573    }
574
575    let manifest_json =
576        serde_json::to_vec(&manifest).context("Failed to serialize attachment manifest")?;
577    let manifest_nonce = derive_blob_nonce("manifest");
578    let reencrypted_manifest = cipher
579        .encrypt(
580            Nonce::from_slice(&manifest_nonce),
581            Payload {
582                msg: &manifest_json,
583                aad: new_export_id,
584            },
585        )
586        .map_err(|e| anyhow::anyhow!("Manifest encryption failed during key rotation: {}", e))?;
587
588    write_ciphertext_file(
589        &output_blobs_dir.join("manifest.enc"),
590        &reencrypted_manifest,
591        "attachment manifest",
592    )
593    .context("Failed to rewrite attachment manifest during key rotation")?;
594
595    Ok(())
596}
597
598fn ensure_real_output_directory(path: &Path, label: &str) -> Result<()> {
599    ensure_existing_ancestors_have_no_symlinks(path, label)?;
600    fs::create_dir_all(path).with_context(|| format!("Failed to create {label}"))?;
601    ensure_existing_ancestors_have_no_symlinks(path, label)?;
602
603    let metadata =
604        fs::symlink_metadata(path).with_context(|| format!("Failed to inspect {label}"))?;
605    let file_type = metadata.file_type();
606    if file_type.is_symlink() {
607        bail!("{label} must not be a symlink: {}", path.display());
608    }
609    if !file_type.is_dir() {
610        bail!("{label} must be a directory: {}", path.display());
611    }
612    Ok(())
613}
614
615fn ensure_existing_ancestors_have_no_symlinks(path: &Path, label: &str) -> Result<()> {
616    let mut ancestors: Vec<PathBuf> = path
617        .ancestors()
618        .filter(|ancestor| !ancestor.as_os_str().is_empty())
619        .map(Path::to_path_buf)
620        .collect();
621    ancestors.reverse();
622
623    for ancestor in ancestors {
624        match fs::symlink_metadata(&ancestor) {
625            Ok(metadata) => {
626                let file_type = metadata.file_type();
627                if file_type.is_symlink() {
628                    bail!("{label} must not contain symlinks: {}", ancestor.display());
629                }
630                if !file_type.is_dir() {
631                    bail!(
632                        "{label} parent path must be a directory: {}",
633                        ancestor.display()
634                    );
635                }
636            }
637            Err(err) if err.kind() == std::io::ErrorKind::NotFound => {}
638            Err(err) => {
639                return Err(err)
640                    .with_context(|| format!("Failed to inspect {label} {}", ancestor.display()));
641            }
642        }
643    }
644
645    Ok(())
646}
647
648fn write_ciphertext_file(path: &Path, bytes: &[u8], label: &str) -> Result<()> {
649    ensure_replaceable_regular_file(path, label)?;
650    let (mut pending, file) = PendingCiphertextFile::create(path, label)?;
651    let mut writer = BufWriter::new(file);
652    writer
653        .write_all(bytes)
654        .with_context(|| format!("Failed to write {label} {}", pending.path().display()))?;
655    writer
656        .flush()
657        .with_context(|| format!("Failed to flush {label} {}", pending.path().display()))?;
658    writer
659        .get_ref()
660        .sync_all()
661        .with_context(|| format!("Failed to sync {label} {}", pending.path().display()))?;
662    drop(writer);
663    pending.persist(path, label)
664}
665
666fn ensure_replaceable_regular_file(path: &Path, label: &str) -> Result<()> {
667    match fs::symlink_metadata(path) {
668        Ok(metadata) => {
669            let file_type = metadata.file_type();
670            if file_type.is_symlink() {
671                bail!(
672                    "Refusing to write {label} through symlink: {}",
673                    path.display()
674                );
675            }
676            if !file_type.is_file() {
677                bail!(
678                    "Refusing to replace {label} at non-file path: {}",
679                    path.display()
680                );
681            }
682            Ok(())
683        }
684        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(()),
685        Err(err) => {
686            Err(err).with_context(|| format!("Failed to inspect {label} {}", path.display()))
687        }
688    }
689}
690
691struct PendingCiphertextFile {
692    path: PathBuf,
693    keep: bool,
694}
695
696impl PendingCiphertextFile {
697    fn create(final_path: &Path, label: &str) -> Result<(Self, File)> {
698        let parent = output_parent(final_path);
699        let file_name = final_path
700            .file_name()
701            .ok_or_else(|| anyhow::anyhow!("{label} path must name a file"))?
702            .to_string_lossy();
703
704        for attempt in 0..100u32 {
705            let random: u64 = rand::random();
706            let temp_path = parent.join(format!(
707                ".{file_name}.cass-attachment-tmp.{}.{}.{:016x}",
708                std::process::id(),
709                attempt,
710                random
711            ));
712
713            match OpenOptions::new()
714                .write(true)
715                .create_new(true)
716                .open(&temp_path)
717            {
718                Ok(file) => {
719                    return Ok((
720                        Self {
721                            path: temp_path,
722                            keep: false,
723                        },
724                        file,
725                    ));
726                }
727                Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => continue,
728                Err(err) => {
729                    return Err(err).with_context(|| {
730                        format!("Failed to create temporary {label} {}", temp_path.display())
731                    });
732                }
733            }
734        }
735
736        bail!(
737            "Failed to create a unique temporary {label} next to {} after 100 attempts",
738            final_path.display()
739        );
740    }
741
742    fn path(&self) -> &Path {
743        &self.path
744    }
745
746    fn persist(&mut self, final_path: &Path, label: &str) -> Result<()> {
747        replace_ciphertext_file_from_temp(&self.path, final_path, label)?;
748        self.keep = true;
749        Ok(())
750    }
751}
752
753impl Drop for PendingCiphertextFile {
754    fn drop(&mut self) {
755        if !self.keep {
756            let _ = fs::remove_file(&self.path);
757        }
758    }
759}
760
761fn output_parent(path: &Path) -> &Path {
762    path.parent()
763        .filter(|parent| !parent.as_os_str().is_empty())
764        .unwrap_or_else(|| Path::new("."))
765}
766
767fn replace_ciphertext_file_from_temp(
768    temp_path: &Path,
769    final_path: &Path,
770    label: &str,
771) -> Result<()> {
772    replace_ciphertext_file_from_temp_impl(temp_path, final_path, label)?;
773    sync_parent_directory(final_path)
774}
775
776#[cfg(not(windows))]
777fn replace_ciphertext_file_from_temp_impl(
778    temp_path: &Path,
779    final_path: &Path,
780    label: &str,
781) -> Result<()> {
782    fs::rename(temp_path, final_path).with_context(|| {
783        format!(
784            "Failed to install {label} {} from {}",
785            final_path.display(),
786            temp_path.display()
787        )
788    })
789}
790
791#[cfg(windows)]
792fn replace_ciphertext_file_from_temp_impl(
793    temp_path: &Path,
794    final_path: &Path,
795    label: &str,
796) -> Result<()> {
797    ensure_replaceable_regular_file(final_path, label)?;
798    match fs::rename(temp_path, final_path) {
799        Ok(()) => Ok(()),
800        Err(err) if err.kind() == std::io::ErrorKind::AlreadyExists => {
801            fs::copy(temp_path, final_path).with_context(|| {
802                format!(
803                    "Failed to install {label} {} from {}",
804                    final_path.display(),
805                    temp_path.display()
806                )
807            })?;
808            fs::remove_file(temp_path).with_context(|| {
809                format!(
810                    "Failed to remove temporary {label} {} after install",
811                    temp_path.display()
812                )
813            })?;
814            Ok(())
815        }
816        Err(err) => Err(err).with_context(|| {
817            format!(
818                "Failed to install {label} {} from {}",
819                final_path.display(),
820                temp_path.display()
821            )
822        }),
823    }
824}
825
826#[cfg(not(windows))]
827fn sync_parent_directory(path: &Path) -> Result<()> {
828    let Some(parent) = path
829        .parent()
830        .filter(|parent| !parent.as_os_str().is_empty())
831    else {
832        return Ok(());
833    };
834    File::open(parent)
835        .with_context(|| format!("Failed to open parent directory {}", parent.display()))?
836        .sync_all()
837        .with_context(|| format!("Failed to sync parent directory {}", parent.display()))
838}
839
840#[cfg(windows)]
841fn sync_parent_directory(_path: &Path) -> Result<()> {
842    Ok(())
843}
844
845fn ensure_regular_ciphertext_file(path: &Path, label: &str) -> Result<()> {
846    let metadata = fs::symlink_metadata(path)
847        .with_context(|| format!("Failed to inspect {label} at {}", path.display()))?;
848    let file_type = metadata.file_type();
849    if file_type.is_symlink() {
850        bail!("Refusing to read {label} from symlink: {}", path.display());
851    }
852    if !file_type.is_file() {
853        bail!(
854            "Refusing to read {label} from non-file path: {}",
855            path.display()
856        );
857    }
858    Ok(())
859}
860
861#[cfg(test)]
862mod tests {
863    use super::*;
864
865    #[test]
866    fn test_default_config_disabled() {
867        let config = AttachmentConfig::default();
868        assert!(!config.enabled);
869    }
870
871    #[test]
872    fn test_enabled_config() {
873        let config = AttachmentConfig::enabled();
874        assert!(config.enabled);
875        assert_eq!(config.max_file_size_bytes, DEFAULT_MAX_FILE_SIZE);
876        assert_eq!(config.max_total_size_bytes, DEFAULT_MAX_TOTAL_SIZE);
877    }
878
879    #[test]
880    fn test_mime_type_check() {
881        let config = AttachmentConfig::enabled();
882        assert!(config.is_mime_allowed("image/png"));
883        assert!(config.is_mime_allowed("IMAGE/PNG"));
884        assert!(config.is_mime_allowed("text/plain; charset=utf-8"));
885        assert!(config.is_mime_allowed("image/jpeg"));
886        assert!(config.is_mime_allowed("application/pdf"));
887        assert!(config.is_mime_allowed("text/plain"));
888        assert!(!config.is_mime_allowed("application/octet-stream"));
889        assert!(!config.is_mime_allowed("video/mp4"));
890        assert!(!config.is_mime_allowed("image/png-malicious"));
891        assert!(!config.is_mime_allowed("text/html+xml"));
892        assert!(!config.is_mime_allowed(""));
893    }
894
895    #[test]
896    fn test_size_limit_per_file() {
897        let config = AttachmentConfig::enabled().with_max_file_size(1024);
898        let mut processor = AttachmentProcessor::new(config);
899
900        let large_attachment = AttachmentData {
901            filename: "large.txt".to_string(),
902            mime_type: "text/plain".to_string(),
903            data: vec![0u8; 2048], // Over limit
904        };
905
906        let refs = processor
907            .process_attachments(1, &[large_attachment])
908            .unwrap();
909
910        assert!(refs.is_empty()); // Should be skipped
911        assert_eq!(processor.skipped_count(), 1);
912    }
913
914    #[test]
915    fn test_total_size_limit() {
916        let config = AttachmentConfig::enabled()
917            .with_max_file_size(1024)
918            .with_max_total_size(2048);
919        let mut processor = AttachmentProcessor::new(config);
920
921        // Add 3 attachments of ~800 bytes each - should only get 2
922        for i in 0..3 {
923            let attachment = AttachmentData {
924                filename: format!("file{}.txt", i),
925                mime_type: "text/plain".to_string(),
926                data: vec![i as u8; 800],
927            };
928            processor.process_attachments(i as i64, &[attachment]).ok();
929        }
930
931        assert_eq!(processor.count(), 2);
932        assert_eq!(processor.skipped_count(), 1);
933    }
934
935    #[test]
936    fn test_deduplication() {
937        let config = AttachmentConfig::enabled();
938        let mut processor = AttachmentProcessor::new(config);
939
940        let data = vec![1u8, 2, 3, 4, 5];
941
942        // Same data in two attachments
943        let attachment1 = AttachmentData {
944            filename: "file1.txt".to_string(),
945            mime_type: "text/plain".to_string(),
946            data: data.clone(),
947        };
948        let attachment2 = AttachmentData {
949            filename: "file2.txt".to_string(),
950            mime_type: "text/plain".to_string(),
951            data: data.clone(),
952        };
953
954        processor.process_attachments(1, &[attachment1]).unwrap();
955        processor.process_attachments(2, &[attachment2]).unwrap();
956
957        // Two entries but only one unique blob
958        assert_eq!(processor.count(), 2);
959        assert_eq!(processor.blobs.len(), 1);
960        // Size should only count once
961        assert_eq!(processor.total_size(), data.len());
962    }
963
964    #[test]
965    fn test_sha256_hash() {
966        let data = b"hello world";
967        let hash = compute_sha256_hex(data);
968        assert_eq!(
969            hash,
970            "b94d27b9934d3e08a52e52d7da7dabfac484efe37a5380ee9088f7ace2efcde9"
971        );
972    }
973
974    #[test]
975    fn test_blob_nonce_deterministic() {
976        let nonce1 = derive_blob_nonce("test-hash");
977        let nonce2 = derive_blob_nonce("test-hash");
978        assert_eq!(nonce1, nonce2);
979
980        let nonce3 = derive_blob_nonce("different-hash");
981        assert_ne!(nonce1, nonce3);
982    }
983
984    #[test]
985    fn test_blob_encryption_roundtrip() {
986        let data = b"secret attachment data";
987        let dek = [0x42u8; 32];
988        let export_id = [0x01u8; 16];
989        let hash = compute_sha256_hex(data);
990
991        // Encrypt
992        let cipher = Aes256Gcm::new_from_slice(&dek).unwrap();
993        let nonce = derive_blob_nonce(&hash);
994        let hash_bytes = hex::decode(&hash).unwrap();
995        let mut aad = Vec::new();
996        aad.extend_from_slice(&export_id);
997        aad.extend_from_slice(&hash_bytes);
998
999        let ciphertext = cipher
1000            .encrypt(
1001                Nonce::from_slice(&nonce),
1002                Payload {
1003                    msg: &data[..],
1004                    aad: &aad,
1005                },
1006            )
1007            .unwrap();
1008
1009        // Decrypt
1010        let plaintext = decrypt_blob(&ciphertext, &dek, &export_id, &hash).unwrap();
1011
1012        assert_eq!(plaintext, data);
1013    }
1014
1015    #[test]
1016    fn test_write_encrypted_blobs() {
1017        use tempfile::TempDir;
1018
1019        let config = AttachmentConfig::enabled();
1020        let mut processor = AttachmentProcessor::new(config);
1021
1022        let attachment = AttachmentData {
1023            filename: "test.txt".to_string(),
1024            mime_type: "text/plain".to_string(),
1025            data: b"test content".to_vec(),
1026        };
1027
1028        processor.process_attachments(1, &[attachment]).unwrap();
1029
1030        let temp_dir = TempDir::new().unwrap();
1031        let dek = [0x42u8; 32];
1032        let export_id = [0x01u8; 16];
1033
1034        let manifest = processor
1035            .write_encrypted_blobs(temp_dir.path(), &dek, &export_id)
1036            .unwrap();
1037
1038        // Check blobs directory exists
1039        let blobs_dir = temp_dir.path().join("blobs");
1040        assert!(blobs_dir.exists());
1041
1042        // Check manifest.enc exists
1043        assert!(blobs_dir.join("manifest.enc").exists());
1044
1045        // Check manifest contents
1046        assert_eq!(manifest.entries.len(), 1);
1047        assert_eq!(manifest.entries[0].filename, "test.txt");
1048
1049        // Check blob file exists
1050        let blob_path = blobs_dir.join(format!("{}.bin", manifest.entries[0].hash));
1051        assert!(blob_path.exists());
1052
1053        // Verify decryption
1054        let ciphertext = std::fs::read(&blob_path).unwrap();
1055        let plaintext =
1056            decrypt_blob(&ciphertext, &dek, &export_id, &manifest.entries[0].hash).unwrap();
1057        assert_eq!(plaintext, b"test content");
1058    }
1059
1060    #[test]
1061    #[cfg(unix)]
1062    fn test_write_encrypted_blobs_rejects_symlinked_blobs_directory() {
1063        use std::os::unix::fs::symlink;
1064        use tempfile::TempDir;
1065
1066        let config = AttachmentConfig::enabled();
1067        let mut processor = AttachmentProcessor::new(config);
1068        let attachment = AttachmentData {
1069            filename: "test.txt".to_string(),
1070            mime_type: "text/plain".to_string(),
1071            data: b"test content".to_vec(),
1072        };
1073        processor.process_attachments(1, &[attachment]).unwrap();
1074
1075        let output_dir = TempDir::new().unwrap();
1076        let outside_dir = TempDir::new().unwrap();
1077        symlink(outside_dir.path(), output_dir.path().join("blobs")).unwrap();
1078
1079        let dek = [0x42u8; 32];
1080        let export_id = [0x01u8; 16];
1081        let err = processor
1082            .write_encrypted_blobs(output_dir.path(), &dek, &export_id)
1083            .unwrap_err();
1084
1085        assert!(
1086            err.to_string().contains("must not contain symlinks")
1087                || err.to_string().contains("must not be a symlink"),
1088            "unexpected error: {err:#}"
1089        );
1090        assert!(
1091            !outside_dir.path().join("manifest.enc").exists(),
1092            "attachment writer must not write through a symlinked blobs directory"
1093        );
1094    }
1095
1096    #[test]
1097    #[cfg(unix)]
1098    fn test_write_encrypted_blobs_rejects_symlinked_blob_file() {
1099        use std::os::unix::fs::symlink;
1100        use tempfile::TempDir;
1101
1102        let config = AttachmentConfig::enabled();
1103        let mut processor = AttachmentProcessor::new(config);
1104        let data = b"test content".to_vec();
1105        let hash = compute_sha256_hex(&data);
1106        let attachment = AttachmentData {
1107            filename: "test.txt".to_string(),
1108            mime_type: "text/plain".to_string(),
1109            data,
1110        };
1111        processor.process_attachments(1, &[attachment]).unwrap();
1112
1113        let output_dir = TempDir::new().unwrap();
1114        let blobs_dir = output_dir.path().join("blobs");
1115        fs::create_dir_all(&blobs_dir).unwrap();
1116        let protected_target = output_dir.path().join("protected.bin");
1117        fs::write(&protected_target, b"do not overwrite").unwrap();
1118        symlink(&protected_target, blobs_dir.join(format!("{hash}.bin"))).unwrap();
1119
1120        let dek = [0x42u8; 32];
1121        let export_id = [0x01u8; 16];
1122        let err = processor
1123            .write_encrypted_blobs(output_dir.path(), &dek, &export_id)
1124            .unwrap_err();
1125
1126        assert!(
1127            err.to_string().contains("through symlink"),
1128            "unexpected error: {err:#}"
1129        );
1130        assert_eq!(
1131            fs::read(&protected_target).unwrap(),
1132            b"do not overwrite",
1133            "attachment writer must not clobber a symlink target"
1134        );
1135    }
1136
1137    #[test]
1138    fn test_manifest_encryption_roundtrip() {
1139        let manifest = AttachmentManifest {
1140            version: 1,
1141            entries: vec![AttachmentEntry {
1142                hash: "abc123".to_string(),
1143                filename: "test.txt".to_string(),
1144                mime_type: "text/plain".to_string(),
1145                size_bytes: 100,
1146                message_id: 1,
1147            }],
1148            total_size_bytes: 100,
1149        };
1150
1151        let dek = [0x42u8; 32];
1152        let export_id = [0x01u8; 16];
1153
1154        // Encrypt
1155        let cipher = Aes256Gcm::new_from_slice(&dek).unwrap();
1156        let nonce = derive_blob_nonce("manifest");
1157        let manifest_json = serde_json::to_vec(&manifest).unwrap();
1158
1159        let ciphertext = cipher
1160            .encrypt(
1161                Nonce::from_slice(&nonce),
1162                Payload {
1163                    msg: &manifest_json,
1164                    aad: &export_id,
1165                },
1166            )
1167            .unwrap();
1168
1169        // Decrypt
1170        let decrypted = decrypt_manifest(&ciphertext, &dek, &export_id).unwrap();
1171
1172        assert_eq!(decrypted.entries.len(), 1);
1173        assert_eq!(decrypted.entries[0].hash, "abc123");
1174    }
1175
1176    #[test]
1177    fn test_reencrypt_existing_blobs_roundtrip() {
1178        use tempfile::TempDir;
1179
1180        let config = AttachmentConfig::enabled();
1181        let mut processor = AttachmentProcessor::new(config);
1182        let attachment = AttachmentData {
1183            filename: "test.txt".to_string(),
1184            mime_type: "text/plain".to_string(),
1185            data: b"test content".to_vec(),
1186        };
1187        processor.process_attachments(1, &[attachment]).unwrap();
1188
1189        let temp_dir = TempDir::new().unwrap();
1190        let old_dek = [0x42u8; 32];
1191        let old_export_id = [0x01u8; 16];
1192        let new_dek = [0x24u8; 32];
1193        let new_export_id = [0x02u8; 16];
1194
1195        let manifest = processor
1196            .write_encrypted_blobs(temp_dir.path(), &old_dek, &old_export_id)
1197            .unwrap();
1198
1199        reencrypt_blobs_into_dir(
1200            temp_dir.path(),
1201            temp_dir.path(),
1202            &old_dek,
1203            &old_export_id,
1204            &new_dek,
1205            &new_export_id,
1206        )
1207        .unwrap();
1208
1209        let blobs_dir = temp_dir.path().join("blobs");
1210        let manifest_ciphertext = fs::read(blobs_dir.join("manifest.enc")).unwrap();
1211        let decrypted_manifest =
1212            decrypt_manifest(&manifest_ciphertext, &new_dek, &new_export_id).unwrap();
1213        assert_eq!(decrypted_manifest.entries.len(), 1);
1214        assert_eq!(decrypted_manifest.entries[0].hash, manifest.entries[0].hash);
1215
1216        let blob_ciphertext =
1217            fs::read(blobs_dir.join(format!("{}.bin", manifest.entries[0].hash))).unwrap();
1218        let blob_plaintext = decrypt_blob(
1219            &blob_ciphertext,
1220            &new_dek,
1221            &new_export_id,
1222            &manifest.entries[0].hash,
1223        )
1224        .unwrap();
1225        assert_eq!(blob_plaintext, b"test content");
1226        assert!(decrypt_manifest(&manifest_ciphertext, &old_dek, &old_export_id).is_err());
1227    }
1228
1229    #[test]
1230    #[cfg(unix)]
1231    fn test_reencrypt_existing_blobs_rejects_symlinked_blobs_directory() {
1232        use std::os::unix::fs::symlink;
1233        use tempfile::TempDir;
1234
1235        let config = AttachmentConfig::enabled();
1236        let mut processor = AttachmentProcessor::new(config);
1237        let attachment = AttachmentData {
1238            filename: "test.txt".to_string(),
1239            mime_type: "text/plain".to_string(),
1240            data: b"test content".to_vec(),
1241        };
1242        processor.process_attachments(1, &[attachment]).unwrap();
1243
1244        let source_archive_dir = TempDir::new().unwrap();
1245        let outside_dir = TempDir::new().unwrap();
1246        let output_archive_dir = TempDir::new().unwrap();
1247        let old_dek = [0x42u8; 32];
1248        let old_export_id = [0x01u8; 16];
1249        let new_dek = [0x24u8; 32];
1250        let new_export_id = [0x02u8; 16];
1251
1252        processor
1253            .write_encrypted_blobs(outside_dir.path(), &old_dek, &old_export_id)
1254            .unwrap();
1255        symlink(
1256            outside_dir.path().join("blobs"),
1257            source_archive_dir.path().join("blobs"),
1258        )
1259        .unwrap();
1260
1261        let err = reencrypt_blobs_into_dir(
1262            source_archive_dir.path(),
1263            output_archive_dir.path(),
1264            &old_dek,
1265            &old_export_id,
1266            &new_dek,
1267            &new_export_id,
1268        )
1269        .unwrap_err();
1270
1271        assert!(
1272            err.to_string().contains("symlink"),
1273            "unexpected error: {err:#}"
1274        );
1275    }
1276
1277    #[test]
1278    #[cfg(unix)]
1279    fn test_reencrypt_existing_blobs_rejects_symlinked_destination_directory() {
1280        use std::os::unix::fs::symlink;
1281        use tempfile::TempDir;
1282
1283        let config = AttachmentConfig::enabled();
1284        let mut processor = AttachmentProcessor::new(config);
1285        let attachment = AttachmentData {
1286            filename: "test.txt".to_string(),
1287            mime_type: "text/plain".to_string(),
1288            data: b"test content".to_vec(),
1289        };
1290        processor.process_attachments(1, &[attachment]).unwrap();
1291
1292        let source_archive_dir = TempDir::new().unwrap();
1293        let output_archive_dir = TempDir::new().unwrap();
1294        let outside_dir = TempDir::new().unwrap();
1295        let old_dek = [0x42u8; 32];
1296        let old_export_id = [0x01u8; 16];
1297        let new_dek = [0x24u8; 32];
1298        let new_export_id = [0x02u8; 16];
1299
1300        processor
1301            .write_encrypted_blobs(source_archive_dir.path(), &old_dek, &old_export_id)
1302            .unwrap();
1303        fs::create_dir_all(outside_dir.path().join("elsewhere")).unwrap();
1304        symlink(
1305            outside_dir.path().join("elsewhere"),
1306            output_archive_dir.path().join("blobs"),
1307        )
1308        .unwrap();
1309
1310        let err = reencrypt_blobs_into_dir(
1311            source_archive_dir.path(),
1312            output_archive_dir.path(),
1313            &old_dek,
1314            &old_export_id,
1315            &new_dek,
1316            &new_export_id,
1317        )
1318        .unwrap_err();
1319
1320        assert!(
1321            err.to_string().contains("symlink"),
1322            "unexpected error: {err:#}"
1323        );
1324    }
1325
1326    #[test]
1327    #[cfg(unix)]
1328    fn test_reencrypt_existing_blobs_rejects_symlinked_destination_archive_dir() {
1329        use std::os::unix::fs::symlink;
1330        use tempfile::TempDir;
1331
1332        let config = AttachmentConfig::enabled();
1333        let mut processor = AttachmentProcessor::new(config);
1334        let attachment = AttachmentData {
1335            filename: "test.txt".to_string(),
1336            mime_type: "text/plain".to_string(),
1337            data: b"test content".to_vec(),
1338        };
1339        processor.process_attachments(1, &[attachment]).unwrap();
1340
1341        let source_archive_dir = TempDir::new().unwrap();
1342        let link_parent = TempDir::new().unwrap();
1343        let outside_dir = TempDir::new().unwrap();
1344        let output_archive_link = link_parent.path().join("archive-link");
1345        let old_dek = [0x42u8; 32];
1346        let old_export_id = [0x01u8; 16];
1347        let new_dek = [0x24u8; 32];
1348        let new_export_id = [0x02u8; 16];
1349
1350        processor
1351            .write_encrypted_blobs(source_archive_dir.path(), &old_dek, &old_export_id)
1352            .unwrap();
1353        symlink(outside_dir.path(), &output_archive_link).unwrap();
1354
1355        let err = reencrypt_blobs_into_dir(
1356            source_archive_dir.path(),
1357            &output_archive_link,
1358            &old_dek,
1359            &old_export_id,
1360            &new_dek,
1361            &new_export_id,
1362        )
1363        .unwrap_err();
1364
1365        assert!(
1366            err.to_string().contains("symlink"),
1367            "unexpected error: {err:#}"
1368        );
1369        assert!(
1370            !outside_dir.path().join("blobs/manifest.enc").exists(),
1371            "key rotation must not write attachments through a symlinked archive directory"
1372        );
1373    }
1374}