Skip to main content

oxillama_server/
files_store.rs

1//! Disk-backed persistent store for the Files API.
2//!
3//! Directory layout:
4//!
5//! ```text
6//! <root>/
7//!   <file_id>/
8//!     meta.json   — OxiFile metadata (atomic write)
9//!     data.bin    — raw bytes (atomic write)
10//! ```
11//!
12//! All writes use `tempfile::NamedTempFile::persist` so readers never observe
13//! a partial file.  The store is safe across server restarts.
14
15use std::fs;
16use std::io::Write as _;
17use std::path::{Path, PathBuf};
18use std::time::SystemTime;
19
20use serde::{Deserialize, Serialize};
21use tempfile::NamedTempFile;
22
23use crate::error::ServerError;
24
25/// Result type for file store operations.
26pub type FilesStoreResult<T> = Result<T, ServerError>;
27
28/// The purpose of a file uploaded to the Files API.
29#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
30#[serde(rename_all = "snake_case")]
31pub enum FilePurpose {
32    /// File is used as an attachment for the Assistants API.
33    Assistants,
34    /// File is used for a Batch API job.
35    Batch,
36    /// File is used for fine-tuning.
37    FineTune,
38}
39
40impl FilePurpose {
41    /// Parse a purpose string from a form field value.
42    ///
43    /// Returns `None` if the string does not match a known purpose.
44    pub fn from_purpose_str(s: &str) -> Option<Self> {
45        match s {
46            "assistants" => Some(Self::Assistants),
47            "batch" => Some(Self::Batch),
48            "fine-tune" | "fine_tune" => Some(Self::FineTune),
49            _ => None,
50        }
51    }
52}
53
54/// Metadata for a single uploaded file.
55///
56/// The `id` is always prefixed with `"file-"`.  The `status` field mirrors the
57/// OpenAI API (always `"uploaded"` for freshly-created files; deletion removes
58/// the entry from disk entirely).
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct OxiFile {
61    /// Stable identifier (`file-<uuid>`).
62    pub id: String,
63    /// Always `"file"` — OpenAI object type discriminator.
64    pub object: String,
65    /// Original filename supplied by the uploader.
66    pub filename: String,
67    /// Upload purpose.
68    pub purpose: FilePurpose,
69    /// Size of the raw bytes in bytes.
70    pub bytes: usize,
71    /// Unix timestamp (seconds) when the file was created.
72    pub created_at: u64,
73    /// Processing status — always `"uploaded"` immediately after upload.
74    pub status: String,
75}
76
77/// Maximum upload size: 512 MiB.
78pub const MAX_FILE_BYTES: usize = 512 * 1024 * 1024;
79
80/// Disk-backed store for uploaded files.
81///
82/// All methods are synchronous and intended to be called from within a
83/// `tokio::task::spawn_blocking` context (see the route handlers).
84pub struct FilesStore {
85    /// Root directory that contains one sub-directory per file.
86    root: PathBuf,
87}
88
89impl FilesStore {
90    /// Open (or create) the files store root directory.
91    ///
92    /// Creates the directory tree if it does not exist.
93    pub fn new(root: PathBuf) -> FilesStoreResult<Self> {
94        fs::create_dir_all(&root).map_err(|e| ServerError::IoError {
95            context: format!("create files store root {}", root.display()),
96            source: e,
97        })?;
98        Ok(Self { root })
99    }
100
101    /// Upload a new file and persist it atomically.
102    ///
103    /// Returns the `OxiFile` metadata record.  Fails with `FileTooLarge` if
104    /// `data.len()` exceeds `MAX_FILE_BYTES`.
105    pub fn create(
106        &self,
107        filename: &str,
108        purpose: FilePurpose,
109        data: &[u8],
110    ) -> FilesStoreResult<OxiFile> {
111        self.create_with_limit(filename, purpose, data, MAX_FILE_BYTES)
112    }
113
114    /// Like `create` but with a caller-supplied byte limit.
115    ///
116    /// Useful in tests where the 512 MiB default is impractical.
117    pub fn create_with_limit(
118        &self,
119        filename: &str,
120        purpose: FilePurpose,
121        data: &[u8],
122        limit: usize,
123    ) -> FilesStoreResult<OxiFile> {
124        if data.len() > limit {
125            return Err(ServerError::FileTooLarge(format!(
126                "file '{}' is {} bytes; limit is {} bytes",
127                filename,
128                data.len(),
129                limit
130            )));
131        }
132
133        let file_id = format!("file-{}", uuid::Uuid::new_v4().as_simple());
134        let file_dir = self.file_dir(&file_id);
135        fs::create_dir_all(&file_dir).map_err(|e| ServerError::IoError {
136            context: format!("create file directory {}", file_dir.display()),
137            source: e,
138        })?;
139
140        // Write raw bytes atomically.
141        self.write_bytes_atomic(&file_dir, "data.bin", data)?;
142
143        let created_at = SystemTime::now()
144            .duration_since(SystemTime::UNIX_EPOCH)
145            .map(|d| d.as_secs())
146            .unwrap_or(0);
147
148        let meta = OxiFile {
149            id: file_id.clone(),
150            object: "file".to_string(),
151            filename: filename.to_string(),
152            purpose,
153            bytes: data.len(),
154            created_at,
155            status: "uploaded".to_string(),
156        };
157
158        // Write metadata atomically.
159        self.write_json_atomic(&file_dir, "meta.json", &meta)?;
160
161        Ok(meta)
162    }
163
164    /// Retrieve metadata for a single file by ID.
165    ///
166    /// Returns `FileNotFound` if no entry with this ID exists.
167    pub fn get(&self, file_id: &str) -> FilesStoreResult<OxiFile> {
168        let path = self.file_dir(file_id).join("meta.json");
169        let content = fs::read_to_string(&path)
170            .map_err(|_| ServerError::FileNotFound(file_id.to_string()))?;
171        serde_json::from_str(&content).map_err(ServerError::Serialization)
172    }
173
174    /// List all files stored in the root directory.
175    ///
176    /// Returns them in an unspecified order.  Entries whose `meta.json` cannot
177    /// be parsed are silently skipped so a single corrupt entry does not block
178    /// the listing.
179    pub fn list(&self) -> FilesStoreResult<Vec<OxiFile>> {
180        let mut files = Vec::new();
181        for entry in fs::read_dir(&self.root).map_err(|e| ServerError::IoError {
182            context: "list files directory".to_string(),
183            source: e,
184        })? {
185            let entry = entry.map_err(|e| ServerError::IoError {
186                context: "read files directory entry".to_string(),
187                source: e,
188            })?;
189            if !entry.file_type().map(|t| t.is_dir()).unwrap_or(false) {
190                continue;
191            }
192            let meta_path = entry.path().join("meta.json");
193            if !meta_path.exists() {
194                continue;
195            }
196            if let Ok(content) = fs::read_to_string(&meta_path) {
197                if let Ok(meta) = serde_json::from_str::<OxiFile>(&content) {
198                    files.push(meta);
199                }
200            }
201        }
202        // Sort by creation time for deterministic ordering.
203        files.sort_by_key(|f| f.created_at);
204        Ok(files)
205    }
206
207    /// Read the raw bytes for a file.
208    ///
209    /// Returns `FileNotFound` if the file does not exist.
210    pub fn get_content(&self, file_id: &str) -> FilesStoreResult<Vec<u8>> {
211        let dir = self.file_dir(file_id);
212        // Check that the meta exists first for a clean error message.
213        if !dir.join("meta.json").exists() {
214            return Err(ServerError::FileNotFound(file_id.to_string()));
215        }
216        let data_path = dir.join("data.bin");
217        fs::read(&data_path).map_err(|e| ServerError::IoError {
218            context: format!("read file content for {file_id}"),
219            source: e,
220        })
221    }
222
223    /// Delete a file and all associated data.
224    ///
225    /// Returns `FileNotFound` if no such file exists.
226    pub fn delete(&self, file_id: &str) -> FilesStoreResult<()> {
227        let dir = self.file_dir(file_id);
228        if !dir.join("meta.json").exists() {
229            return Err(ServerError::FileNotFound(file_id.to_string()));
230        }
231        fs::remove_dir_all(&dir).map_err(|e| ServerError::IoError {
232            context: format!("delete file directory for {file_id}"),
233            source: e,
234        })?;
235        Ok(())
236    }
237
238    // ── Path helpers ──────────────────────────────────────────────────────────
239
240    fn file_dir(&self, file_id: &str) -> PathBuf {
241        self.root.join(file_id)
242    }
243
244    // ── Private helpers ───────────────────────────────────────────────────────
245
246    /// Write arbitrary bytes atomically via temp file + rename.
247    fn write_bytes_atomic(&self, dir: &Path, filename: &str, data: &[u8]) -> FilesStoreResult<()> {
248        let mut tmp = NamedTempFile::new_in(dir).map_err(|e| ServerError::IoError {
249            context: format!("create temp file in {}", dir.display()),
250            source: e,
251        })?;
252        tmp.write_all(data).map_err(|e| ServerError::IoError {
253            context: "write bytes to temp file".to_string(),
254            source: e,
255        })?;
256        tmp.flush().map_err(|e| ServerError::IoError {
257            context: "flush bytes temp file".to_string(),
258            source: e,
259        })?;
260        let target = dir.join(filename);
261        tmp.persist(&target).map_err(|e| ServerError::IoError {
262            context: format!("persist atomic write to {}", target.display()),
263            source: e.error,
264        })?;
265        Ok(())
266    }
267
268    /// Serialize `value` to JSON and write atomically via temp file + rename.
269    fn write_json_atomic<T: serde::Serialize>(
270        &self,
271        dir: &Path,
272        filename: &str,
273        value: &T,
274    ) -> FilesStoreResult<()> {
275        let json = serde_json::to_string_pretty(value).map_err(ServerError::Serialization)?;
276        let mut tmp = NamedTempFile::new_in(dir).map_err(|e| ServerError::IoError {
277            context: format!("create json temp file in {}", dir.display()),
278            source: e,
279        })?;
280        tmp.write_all(json.as_bytes())
281            .map_err(|e| ServerError::IoError {
282                context: "write json to temp file".to_string(),
283                source: e,
284            })?;
285        tmp.flush().map_err(|e| ServerError::IoError {
286            context: "flush json temp file".to_string(),
287            source: e,
288        })?;
289        let target = dir.join(filename);
290        tmp.persist(&target).map_err(|e| ServerError::IoError {
291            context: format!("persist atomic json write to {}", target.display()),
292            source: e.error,
293        })?;
294        Ok(())
295    }
296}
297
298// ── Tests ─────────────────────────────────────────────────────────────────────
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303    use std::env::temp_dir;
304    use uuid::Uuid;
305
306    fn make_store(tag: &str) -> FilesStore {
307        let id = Uuid::new_v4().as_simple().to_string();
308        let dir = temp_dir().join(format!("oxillama_files_store_test_{tag}_{id}"));
309        FilesStore::new(dir).expect("FilesStore::new should succeed")
310    }
311
312    /// `create` returns a file id that starts with `"file-"`.
313    #[test]
314    fn files_create_returns_id() {
315        let store = make_store("create_id");
316        let data = b"hello world";
317        let meta = store
318            .create("hello.txt", FilePurpose::Assistants, data)
319            .expect("create should succeed");
320        assert!(
321            meta.id.starts_with("file-"),
322            "id should start with file-: {}",
323            meta.id
324        );
325        assert_eq!(meta.filename, "hello.txt");
326        assert_eq!(meta.bytes, data.len());
327        assert_eq!(meta.status, "uploaded");
328        assert_eq!(meta.purpose, FilePurpose::Assistants);
329    }
330
331    /// After `create`, `list` includes the new file.
332    #[test]
333    fn files_list_returns_uploaded() {
334        let store = make_store("list_uploaded");
335        let data = b"some content";
336        let meta = store
337            .create("report.jsonl", FilePurpose::Batch, data)
338            .expect("create");
339        let list = store.list().expect("list");
340        assert!(
341            list.iter().any(|f| f.id == meta.id),
342            "list should contain the created file"
343        );
344    }
345
346    /// `get_content` returns bytes identical to what was uploaded.
347    #[test]
348    fn files_content_returns_bytes() {
349        let store = make_store("content_bytes");
350        let data = b"the quick brown fox";
351        let meta = store
352            .create("fox.txt", FilePurpose::Assistants, data)
353            .expect("create");
354        let content = store.get_content(&meta.id).expect("get_content");
355        assert_eq!(content.as_slice(), data);
356    }
357
358    /// After `delete`, `get` returns `FileNotFound`.
359    #[test]
360    fn files_delete_removes_persisted_state() {
361        let store = make_store("delete_state");
362        let data = b"temporary";
363        let meta = store
364            .create("tmp.txt", FilePurpose::FineTune, data)
365            .expect("create");
366        store.delete(&meta.id).expect("delete should succeed");
367        let err = store
368            .get(&meta.id)
369            .expect_err("get should fail after delete");
370        assert!(
371            matches!(err, ServerError::FileNotFound(_)),
372            "expected FileNotFound, got: {err}"
373        );
374    }
375
376    /// Uploading a file larger than the configured limit returns `FileTooLarge`.
377    #[test]
378    fn files_too_large_checked() {
379        let store = make_store("too_large");
380        // Use a tiny limit (16 bytes) so the test does not allocate gigabytes.
381        let data = vec![0u8; 32];
382        let err = store
383            .create_with_limit("big.bin", FilePurpose::Assistants, &data, 16)
384            .expect_err("should fail with too-large data");
385        assert!(
386            matches!(err, ServerError::FileTooLarge(_)),
387            "expected FileTooLarge, got: {err}"
388        );
389    }
390
391    /// Deleting a non-existent file returns `FileNotFound`.
392    #[test]
393    fn files_delete_nonexistent_returns_not_found() {
394        let store = make_store("delete_notfound");
395        let err = store
396            .delete("file-doesnotexist")
397            .expect_err("delete of nonexistent should fail");
398        assert!(matches!(err, ServerError::FileNotFound(_)));
399    }
400
401    /// `list` on an empty store returns an empty vec.
402    #[test]
403    fn files_list_empty_store() {
404        let store = make_store("list_empty");
405        let list = store.list().expect("list on empty store");
406        assert!(list.is_empty());
407    }
408
409    /// Persistence: metadata is readable after drop + re-open of store.
410    #[test]
411    fn files_persist_across_store_drop_and_recreate() {
412        let id = Uuid::new_v4().as_simple().to_string();
413        let dir = temp_dir().join(format!("oxillama_files_persist_{id}"));
414        let file_id = {
415            let store = FilesStore::new(dir.clone()).expect("create store");
416            let meta = store
417                .create("data.bin", FilePurpose::Assistants, b"persisted bytes")
418                .expect("create");
419            meta.id
420        };
421
422        // Drop and reopen.
423        let store2 = FilesStore::new(dir).expect("reopen store");
424        let meta = store2.get(&file_id).expect("get after reopen");
425        assert_eq!(meta.id, file_id);
426        assert_eq!(meta.filename, "data.bin");
427        let content = store2.get_content(&file_id).expect("content after reopen");
428        assert_eq!(content.as_slice(), b"persisted bytes");
429    }
430}