Skip to main content

lash_core/attachments/
file_store.rs

1use std::collections::HashMap;
2use std::fs;
3use std::path::{Path, PathBuf};
4use std::sync::Mutex;
5
6use lash_sansio::{AttachmentCreateMeta, AttachmentId, AttachmentMeta, AttachmentRef};
7
8use super::{
9    AttachmentStore, AttachmentStoreError, AttachmentStorePersistence, StoredAttachment, content_id,
10};
11
12pub struct FileAttachmentStore {
13    root: PathBuf,
14    meta: Mutex<HashMap<AttachmentId, AttachmentMeta>>,
15}
16
17impl FileAttachmentStore {
18    pub fn new(root: impl Into<PathBuf>) -> Self {
19        Self {
20            root: root.into(),
21            meta: Mutex::new(HashMap::new()),
22        }
23    }
24
25    pub fn root(&self) -> &Path {
26        &self.root
27    }
28
29    /// Lock the in-memory metadata cache, recovering from a poisoned lock
30    /// rather than panicking. The cache is a best-effort fast path backed by
31    /// the on-disk `.json` sidecars, so a prior panic while it was held must
32    /// not permanently brick the store — `get`/`put` simply fall back to disk.
33    fn meta_cache(&self) -> std::sync::MutexGuard<'_, HashMap<AttachmentId, AttachmentMeta>> {
34        self.meta
35            .lock()
36            .unwrap_or_else(|poisoned| poisoned.into_inner())
37    }
38
39    fn path_for_id(&self, id: &AttachmentId) -> PathBuf {
40        let id = id.as_str();
41        let prefix = id.get(..2).unwrap_or(id);
42        self.root.join("sha256").join(prefix).join(id)
43    }
44
45    fn meta_path_for_id(&self, id: &AttachmentId) -> PathBuf {
46        self.path_for_id(id).with_extension("json")
47    }
48}
49
50/// Write `bytes` to `final_path` crash-atomically: stage into a sibling
51/// `<final>.tmp`, flush it, then `rename` into place. A `rename` within the
52/// same directory is atomic on POSIX, so a reader (or a crash) ever sees either
53/// the old contents or the complete new contents — never a half-written file.
54/// The temp file is removed on any failure so a crashed write leaves no
55/// `.tmp` litter behind.
56fn write_atomic(final_path: &Path, bytes: &[u8]) -> Result<(), AttachmentStoreError> {
57    let mut tmp_os = final_path.as_os_str().to_os_string();
58    tmp_os.push(".tmp");
59    let tmp_path = PathBuf::from(tmp_os);
60
61    let io_err = |path: &Path, source: std::io::Error| AttachmentStoreError::Io {
62        path: path.to_path_buf(),
63        source,
64    };
65
66    let write_result = (|| {
67        let mut file = fs::File::create(&tmp_path).map_err(|source| io_err(&tmp_path, source))?;
68        std::io::Write::write_all(&mut file, bytes).map_err(|source| io_err(&tmp_path, source))?;
69        // Best-effort durability for the staged bytes before the rename.
70        file.sync_all()
71            .map_err(|source| io_err(&tmp_path, source))?;
72        fs::rename(&tmp_path, final_path).map_err(|source| io_err(final_path, source))
73    })();
74
75    if write_result.is_err() {
76        // Never leave a partial temp file behind.
77        let _ = fs::remove_file(&tmp_path);
78    }
79    write_result
80}
81
82#[async_trait::async_trait]
83impl AttachmentStore for FileAttachmentStore {
84    fn persistence(&self) -> AttachmentStorePersistence {
85        AttachmentStorePersistence::Durable
86    }
87
88    async fn put(
89        &self,
90        bytes: Vec<u8>,
91        meta: AttachmentCreateMeta,
92    ) -> Result<AttachmentRef, AttachmentStoreError> {
93        let meta = AttachmentMeta::new(
94            content_id(&bytes),
95            meta.media_type,
96            bytes.len() as u64,
97            meta.width,
98            meta.height,
99            meta.label,
100        );
101        let path = self.path_for_id(&meta.id);
102        if let Some(parent) = path.parent() {
103            fs::create_dir_all(parent).map_err(|source| AttachmentStoreError::Io {
104                path: parent.to_path_buf(),
105                source,
106            })?;
107        }
108        if !path.exists() {
109            write_atomic(&path, &bytes)?;
110        }
111        let meta_path = self.meta_path_for_id(&meta.id);
112        let meta_bytes = serde_json::to_vec_pretty(&meta).expect("attachment metadata serializes");
113        write_atomic(&meta_path, &meta_bytes)?;
114        let reference = meta.as_ref();
115        self.meta_cache().insert(reference.id.clone(), meta);
116        Ok(reference)
117    }
118
119    async fn get(&self, id: &AttachmentId) -> Result<StoredAttachment, AttachmentStoreError> {
120        let path = self.path_for_id(id);
121        let bytes = fs::read(&path).map_err(|source| {
122            if source.kind() == std::io::ErrorKind::NotFound {
123                AttachmentStoreError::NotFound(id.clone())
124            } else {
125                AttachmentStoreError::Io {
126                    path: path.clone(),
127                    source,
128                }
129            }
130        })?;
131        let meta = if let Some(meta) = self.meta_cache().get(id).cloned() {
132            meta
133        } else {
134            let meta_path = self.meta_path_for_id(id);
135            let meta_bytes = fs::read(&meta_path).map_err(|source| {
136                if source.kind() == std::io::ErrorKind::NotFound {
137                    AttachmentStoreError::MissingMeta(id.clone())
138                } else {
139                    AttachmentStoreError::Io {
140                        path: meta_path.clone(),
141                        source,
142                    }
143                }
144            })?;
145            serde_json::from_slice(&meta_bytes).map_err(|source| {
146                AttachmentStoreError::MetadataDecode {
147                    id: id.clone(),
148                    source,
149                }
150            })?
151        };
152        Ok(StoredAttachment { meta, bytes })
153    }
154}
155
156#[cfg(test)]
157mod tests {
158    use super::*;
159    use crate::{ImageMediaType, MediaType};
160
161    fn meta() -> AttachmentCreateMeta {
162        AttachmentCreateMeta::new(
163            MediaType::Image(ImageMediaType::Png),
164            Some(1),
165            Some(1),
166            Some("pixel".to_string()),
167        )
168    }
169
170    #[tokio::test]
171    async fn file_store_round_trips_bytes_and_metadata() {
172        let temp = tempfile::tempdir().expect("tempdir");
173        let store = FileAttachmentStore::new(temp.path());
174        let reference = store.put(vec![1, 2, 3], meta()).await.expect("put");
175        let stored = store.get(&reference.id).await.expect("get");
176
177        assert_eq!(stored.bytes, vec![1, 2, 3]);
178        assert_eq!(stored.meta.id, reference.id);
179        assert_eq!(stored.meta.byte_len, 3);
180    }
181
182    // Finding 4: `put` must write crash-atomically (stage into `<final>.tmp`,
183    // then rename). After a successful put there must be no leftover `.tmp`
184    // files in the content directory — proof that the temp file was renamed
185    // into place rather than written in situ.
186    #[tokio::test]
187    async fn file_store_writes_atomically_without_temp_litter() {
188        let temp = tempfile::tempdir().expect("tempdir");
189        let store = FileAttachmentStore::new(temp.path());
190        let reference = store.put(vec![9, 8, 7, 6], meta()).await.expect("put");
191
192        let final_path = store.path_for_id(&reference.id);
193        let meta_path = store.meta_path_for_id(&reference.id);
194        assert!(final_path.exists(), "content file must be in place");
195        assert!(meta_path.exists(), "metadata file must be in place");
196
197        let mut tmp_files = Vec::new();
198        let dir = final_path.parent().expect("content dir");
199        for entry in fs::read_dir(dir).expect("read content dir") {
200            let path = entry.expect("dir entry").path();
201            if path.extension().and_then(|ext| ext.to_str()) == Some("tmp") {
202                tmp_files.push(path);
203            }
204        }
205        assert!(
206            tmp_files.is_empty(),
207            "atomic write must not leave .tmp files behind: {tmp_files:?}"
208        );
209
210        // The bytes round-trip in full (no truncation from a partial write).
211        let stored = store.get(&reference.id).await.expect("get");
212        assert_eq!(stored.bytes, vec![9, 8, 7, 6]);
213    }
214
215    // A stale `<final>.tmp` left by a crashed prior write must not block a
216    // subsequent successful put — the temp file is recreated/truncated.
217    #[tokio::test]
218    async fn file_store_overwrites_stale_temp_file() {
219        let temp = tempfile::tempdir().expect("tempdir");
220        let store = FileAttachmentStore::new(temp.path());
221        let content_id = content_id(&[1, 1, 1]);
222        let id = AttachmentId::new(content_id.to_string());
223        let final_path = store.path_for_id(&id);
224        let parent = final_path.parent().expect("parent");
225        fs::create_dir_all(parent).expect("mkdir");
226        let mut tmp_os = final_path.as_os_str().to_os_string();
227        tmp_os.push(".tmp");
228        fs::write(PathBuf::from(tmp_os), b"stale partial write").expect("seed stale tmp");
229
230        let reference = store
231            .put(vec![1, 1, 1], meta())
232            .await
233            .expect("put over stale tmp");
234        let stored = store.get(&reference.id).await.expect("get");
235        assert_eq!(stored.bytes, vec![1, 1, 1]);
236    }
237
238    // Runs the backend-agnostic `AttachmentStore` conformance suite against
239    // the file-backed implementation. The same suite runs against the
240    // in-memory store, so both backends are held to one contract.
241    #[tokio::test]
242    async fn file_attachment_store_satisfies_conformance() {
243        use std::sync::Arc;
244
245        use crate::testing::conformance::ReopenableAttachmentStore;
246
247        // Each `make()` call needs its own root that outlives the returned
248        // store. Keep the tempdirs alive for the duration of the suite.
249        let dirs: Arc<Mutex<Vec<tempfile::TempDir>>> = Arc::new(Mutex::new(Vec::new()));
250        crate::testing::conformance::attachment_store_reopenable(
251            || {
252                let dir = tempfile::tempdir().expect("tempdir");
253                let open =
254                    Arc::new(FileAttachmentStore::new(dir.path())) as Arc<dyn AttachmentStore>;
255                let reopen =
256                    Arc::new(FileAttachmentStore::new(dir.path())) as Arc<dyn AttachmentStore>;
257                dirs.lock().expect("dirs lock").push(dir);
258                ReopenableAttachmentStore { open, reopen }
259            },
260            AttachmentStorePersistence::Durable,
261        )
262        .await;
263    }
264}