batuta/serve/banco/
storage.rs1use serde::{Deserialize, Serialize};
7use std::collections::HashMap;
8use std::path::PathBuf;
9use std::sync::{Arc, RwLock};
10
11#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct FileInfo {
14 pub id: String,
15 pub name: String,
16 pub size_bytes: u64,
17 pub content_type: String,
18 pub uploaded_at: u64,
19 pub content_hash: String,
20}
21
22impl FileInfo {
24 fn detect_content_type(name: &str) -> String {
25 match name.rsplit('.').next().map(str::to_lowercase).as_deref() {
26 Some("pdf") => "application/pdf",
27 Some("csv") => "text/csv",
28 Some("json") => "application/json",
29 Some("jsonl") => "application/jsonl",
30 Some("txt") => "text/plain",
31 Some("docx") => {
32 "application/vnd.openxmlformats-officedocument.wordprocessingml.document"
33 }
34 _ => "application/octet-stream",
35 }
36 .to_string()
37 }
38}
39
40pub struct FileStore {
42 files: RwLock<HashMap<String, FileInfo>>,
43 content: RwLock<HashMap<String, Vec<u8>>>,
45 data_dir: Option<PathBuf>,
46 counter: std::sync::atomic::AtomicU64,
47}
48
49impl FileStore {
50 #[must_use]
52 pub fn in_memory() -> Arc<Self> {
53 Arc::new(Self {
54 files: RwLock::new(HashMap::new()),
55 content: RwLock::new(HashMap::new()),
56 data_dir: None,
57 counter: std::sync::atomic::AtomicU64::new(0),
58 })
59 }
60
61 #[must_use]
63 pub fn with_data_dir(dir: PathBuf) -> Arc<Self> {
64 let uploads_dir = dir.join("uploads");
65 let _ = std::fs::create_dir_all(&uploads_dir);
66
67 let mut files = HashMap::new();
69 let mut max_seq = 0u64;
70 if let Ok(entries) = std::fs::read_dir(&uploads_dir) {
71 for entry in entries.flatten() {
72 let path = entry.path();
73 if path.extension().and_then(|e| e.to_str()) == Some("json") {
74 if let Ok(data) = std::fs::read_to_string(&path) {
75 if let Ok(info) = serde_json::from_str::<FileInfo>(&data) {
76 if let Some(seq_str) = info.id.rsplit('-').next() {
78 if let Ok(seq) = seq_str.parse::<u64>() {
79 max_seq = max_seq.max(seq + 1);
80 }
81 }
82 files.insert(info.id.clone(), info);
83 }
84 }
85 }
86 }
87 }
88
89 let loaded = files.len();
90 if loaded > 0 {
91 eprintln!("[banco] Loaded {loaded} files from {}", uploads_dir.display());
92 }
93
94 Arc::new(Self {
95 files: RwLock::new(files),
96 content: RwLock::new(HashMap::new()),
97 data_dir: Some(dir),
98 counter: std::sync::atomic::AtomicU64::new(max_seq),
99 })
100 }
101
102 pub fn store(&self, name: &str, data: &[u8]) -> FileInfo {
104 let seq = self.counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst);
105 let id = format!("file-{}-{seq}", epoch_secs());
106 let content_hash = sha256_hex(data);
107
108 let info = FileInfo {
109 id: id.clone(),
110 name: name.to_string(),
111 size_bytes: data.len() as u64,
112 content_type: FileInfo::detect_content_type(name),
113 uploaded_at: epoch_secs(),
114 content_hash: content_hash.clone(),
115 };
116
117 if let Some(ref dir) = self.data_dir {
119 let path = dir.join("uploads").join(&content_hash);
120 let _ = std::fs::write(path, data);
121 let meta_path = dir.join("uploads").join(format!("{content_hash}.meta.json"));
123 let _ =
124 std::fs::write(meta_path, serde_json::to_string_pretty(&info).unwrap_or_default());
125 }
126
127 if let Ok(mut store) = self.files.write() {
128 store.insert(id.clone(), info.clone());
129 }
130
131 if let Ok(mut cache) = self.content.write() {
133 cache.insert(id, data.to_vec());
134 }
135
136 info
137 }
138
139 #[must_use]
141 pub fn list(&self) -> Vec<FileInfo> {
142 let store = self.files.read().unwrap_or_else(|e| e.into_inner());
143 let mut files: Vec<FileInfo> = store.values().cloned().collect();
144 files.sort_by(|a, b| b.uploaded_at.cmp(&a.uploaded_at));
145 files
146 }
147
148 #[must_use]
150 pub fn get(&self, id: &str) -> Option<FileInfo> {
151 self.files.read().unwrap_or_else(|e| e.into_inner()).get(id).cloned()
152 }
153
154 #[must_use]
156 pub fn read_content(&self, id: &str) -> Option<Vec<u8>> {
157 if let Ok(cache) = self.content.read() {
159 if let Some(data) = cache.get(id) {
160 return Some(data.clone());
161 }
162 }
163 let info = self.get(id)?;
165 if let Some(ref dir) = self.data_dir {
166 let path = dir.join("uploads").join(&info.content_hash);
167 std::fs::read(path).ok()
168 } else {
169 None
170 }
171 }
172
173 pub fn delete(&self, id: &str) -> Result<(), StorageError> {
175 let info = {
176 let mut store = self.files.write().map_err(|_| StorageError::LockPoisoned)?;
177 store.remove(id).ok_or(StorageError::NotFound(id.to_string()))?
178 };
179
180 if let Ok(mut cache) = self.content.write() {
182 cache.remove(id);
183 }
184 if let Some(ref dir) = self.data_dir {
185 let _ = std::fs::remove_file(dir.join("uploads").join(&info.content_hash));
186 let _ = std::fs::remove_file(
187 dir.join("uploads").join(format!("{}.meta.json", info.content_hash)),
188 );
189 }
190
191 Ok(())
192 }
193
194 #[must_use]
196 pub fn len(&self) -> usize {
197 self.files.read().map(|s| s.len()).unwrap_or(0)
198 }
199
200 #[must_use]
202 pub fn is_empty(&self) -> bool {
203 self.len() == 0
204 }
205}
206
207#[derive(Debug, Clone, PartialEq, Eq)]
209pub enum StorageError {
210 NotFound(String),
211 LockPoisoned,
212}
213
214impl std::fmt::Display for StorageError {
215 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
216 match self {
217 Self::NotFound(id) => write!(f, "File not found: {id}"),
218 Self::LockPoisoned => write!(f, "Internal lock error"),
219 }
220 }
221}
222
223impl std::error::Error for StorageError {}
224
225fn epoch_secs() -> u64 {
226 std::time::SystemTime::now().duration_since(std::time::UNIX_EPOCH).unwrap_or_default().as_secs()
227}
228
229fn sha256_hex(data: &[u8]) -> String {
231 let mut h1: u64 = 0xcbf2_9ce4_8422_2325;
233 let mut h2: u64 = 0x6c62_272e_07bb_0142;
234 for &byte in data {
235 h1 ^= byte as u64;
236 h1 = h1.wrapping_mul(0x0100_0000_01b3);
237 h2 ^= byte as u64;
238 h2 = h2.wrapping_mul(0x0000_0100_0000_01b3);
239 }
240 format!("{h1:016x}{h2:016x}")
241}