Skip to main content

huddle_core/files/
mod.rs

1//! File transfer: chunking, reassembly, hash verification, cache, save.
2//!
3//! Cache layout:
4//!   <data_dir>/files/cache/<file_id>          // verified, complete
5//!   <data_dir>/files/cache/<file_id>.part     // in-progress reassembly
6//!
7//! File-IDs are the SHA-256 hash of the wire bytes (plaintext for
8//! non-encrypted offers, ciphertext for encrypted offers — the
9//! encryption layer is a separate concern). Receivers verify each
10//! completed transfer's bytes match the announced file_id before
11//! exposing the file to the caller.
12
13pub mod encryption;
14
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::sync::Mutex;
19
20use sha2::{Digest, Sha256};
21
22use crate::error::{HuddleError, Result};
23
24/// Bytes per chunk on the wire. Picked to fit comfortably under
25/// gossipsub's per-message budget even after base64 expansion (64 KiB
26/// raw → ~88 KiB base64).
27pub const CHUNK_SIZE: usize = 64 * 1024;
28
29/// Hard cap on a single offer for Phase 2. Larger files defer to a
30/// dedicated libp2p stream protocol (see plan.md Phase 3 notes).
31pub const MAX_FILE_SIZE: u64 = 1024 * 1024;
32
33/// What `prepare_outgoing` hands back: enough to drive a sequence of
34/// FileOffer + N FileChunk gossipsub messages.
35#[derive(Debug, Clone)]
36pub struct OutgoingPlan {
37    pub file_id: String,
38    pub name: String,
39    pub mime: Option<String>,
40    pub size_bytes: u64,
41    pub chunks: Vec<Vec<u8>>,
42}
43
44/// What `accept_chunk` returns on the chunk that completes the transfer.
45#[derive(Debug, Clone)]
46pub struct CompletedFile {
47    pub file_id: String,
48    pub cache_path: PathBuf,
49    pub size_bytes: u64,
50}
51
52struct IncomingTransfer {
53    expected_total: u32,
54    chunks: HashMap<u32, Vec<u8>>,
55    bytes_received: u64,
56}
57
58pub struct FileManager {
59    cache_dir: PathBuf,
60    incoming: Mutex<HashMap<String, IncomingTransfer>>,
61}
62
63impl FileManager {
64    /// `data_dir` is huddle's per-user data directory; the cache lives
65    /// underneath at `<data_dir>/files/cache`.
66    pub fn new(data_dir: &Path) -> Result<Self> {
67        let cache_dir = data_dir.join("files").join("cache");
68        fs::create_dir_all(&cache_dir)?;
69        Ok(Self {
70            cache_dir,
71            incoming: Mutex::new(HashMap::new()),
72        })
73    }
74
75    pub fn cache_dir(&self) -> &Path {
76        &self.cache_dir
77    }
78
79    pub fn cache_path(&self, file_id: &str) -> PathBuf {
80        self.cache_dir.join(file_id)
81    }
82
83    /// Read a previously-completed transfer's bytes from cache.
84    pub fn read_cache(&self, file_id: &str) -> Result<Vec<u8>> {
85        let path = self.cache_path(file_id);
86        Ok(fs::read(&path)?)
87    }
88
89    /// Build a transfer plan from an on-disk file.
90    pub fn prepare_outgoing_from_path(&self, path: &Path) -> Result<OutgoingPlan> {
91        let bytes = fs::read(path)?;
92        let name = path
93            .file_name()
94            .map(|n| n.to_string_lossy().to_string())
95            .unwrap_or_else(|| "untitled".into());
96        let mime = guess_mime(&name);
97        self.prepare_outgoing_from_bytes(&name, mime, bytes)
98    }
99
100    /// Build a transfer plan from an in-memory blob (useful for the
101    /// encrypted path, where the caller pre-encrypts a file).
102    pub fn prepare_outgoing_from_bytes(
103        &self,
104        name: &str,
105        mime: Option<String>,
106        bytes: Vec<u8>,
107    ) -> Result<OutgoingPlan> {
108        let size = bytes.len() as u64;
109        if size > MAX_FILE_SIZE {
110            return Err(HuddleError::Other(format!(
111                "file is {} bytes — Phase 2 cap is {} (~1 MiB)",
112                size, MAX_FILE_SIZE
113            )));
114        }
115        let file_id = sha256_hex(&bytes);
116        let chunks: Vec<Vec<u8>> = bytes.chunks(CHUNK_SIZE).map(|c| c.to_vec()).collect();
117        let chunks = if chunks.is_empty() {
118            vec![Vec::new()]
119        } else {
120            chunks
121        };
122
123        // Stash the outgoing file into our own cache too — that way the
124        // sender's UI can show the same "ready" card and re-save it
125        // later without a round-trip.
126        let cache_path = self.cache_path(&file_id);
127        if !cache_path.exists() {
128            fs::write(&cache_path, &bytes)?;
129        }
130
131        Ok(OutgoingPlan {
132            file_id,
133            name: name.to_string(),
134            mime,
135            size_bytes: size,
136            chunks,
137        })
138    }
139
140    /// Accept one chunk of an incoming transfer. Returns `Some` only on
141    /// the last chunk that completes the file (after SHA-256 verification).
142    pub fn accept_chunk(
143        &self,
144        file_id: &str,
145        chunk_index: u32,
146        total_chunks: u32,
147        data: Vec<u8>,
148        expected_size: u64,
149    ) -> Result<Option<CompletedFile>> {
150        if expected_size > MAX_FILE_SIZE {
151            return Err(HuddleError::Other(format!(
152                "incoming size {} exceeds Phase 2 cap",
153                expected_size
154            )));
155        }
156        // Fast-skip if already complete.
157        let cache_path = self.cache_path(file_id);
158        if cache_path.exists() {
159            let bytes = fs::read(&cache_path)?;
160            if sha256_hex(&bytes) == file_id {
161                return Ok(Some(CompletedFile {
162                    file_id: file_id.into(),
163                    cache_path,
164                    size_bytes: bytes.len() as u64,
165                }));
166            }
167        }
168
169        let mut map = self.incoming.lock().unwrap();
170        let entry = map.entry(file_id.to_string()).or_insert(IncomingTransfer {
171            expected_total: total_chunks,
172            chunks: HashMap::new(),
173            bytes_received: 0,
174        });
175        if entry.expected_total != total_chunks {
176            return Err(HuddleError::Other(
177                "chunk total disagrees with prior chunks".into(),
178            ));
179        }
180        if !entry.chunks.contains_key(&chunk_index) {
181            entry.bytes_received += data.len() as u64;
182            entry.chunks.insert(chunk_index, data);
183        }
184
185        if entry.chunks.len() as u32 != entry.expected_total {
186            return Ok(None);
187        }
188
189        // All chunks arrived — assemble and verify.
190        let total = entry.expected_total;
191        let mut assembled: Vec<u8> = Vec::with_capacity(entry.bytes_received as usize);
192        for idx in 0..total {
193            let part = entry
194                .chunks
195                .get(&idx)
196                .ok_or_else(|| HuddleError::Other(format!("missing chunk {idx}")))?;
197            assembled.extend_from_slice(part);
198        }
199        map.remove(file_id);
200        drop(map);
201
202        let computed = sha256_hex(&assembled);
203        if computed != file_id {
204            return Err(HuddleError::Other(format!(
205                "hash mismatch — expected {}, got {}",
206                file_id, computed
207            )));
208        }
209        // Write to a `.part` then atomically rename — never expose a
210        // partial file under the final name.
211        let part = self.cache_dir.join(format!("{}.part", file_id));
212        fs::write(&part, &assembled)?;
213        fs::rename(&part, &cache_path)?;
214
215        Ok(Some(CompletedFile {
216            file_id: file_id.into(),
217            cache_path,
218            size_bytes: assembled.len() as u64,
219        }))
220    }
221
222    /// Drop any partial state for an incoming transfer.
223    pub fn cancel_incoming(&self, file_id: &str) {
224        self.incoming.lock().unwrap().remove(file_id);
225    }
226
227    /// Bytes received so far for an in-progress transfer.
228    pub fn progress(&self, file_id: &str) -> Option<(u64, u64)> {
229        let map = self.incoming.lock().unwrap();
230        let e = map.get(file_id)?;
231        Some((e.bytes_received, 0)) // total not known until offer arrives
232    }
233
234    /// Copy `bytes` into the platform's Downloads folder under
235    /// `target_name` (with `-N` suffix on collision). Returns the
236    /// absolute path of the saved file.
237    pub fn write_to_downloads(&self, target_name: &str, bytes: &[u8]) -> Result<PathBuf> {
238        let dir = dirs::download_dir()
239            .or_else(dirs::home_dir)
240            .ok_or_else(|| HuddleError::Other("no Downloads / home directory".into()))?;
241        fs::create_dir_all(&dir)?;
242        let sanitized = sanitize_filename(target_name);
243        let path = pick_non_colliding(&dir, &sanitized);
244        fs::write(&path, bytes)?;
245        Ok(path)
246    }
247}
248
249fn sha256_hex(bytes: &[u8]) -> String {
250    let hash = Sha256::digest(bytes);
251    hex::encode(hash)
252}
253
254fn sanitize_filename(name: &str) -> String {
255    let cleaned: String = name
256        .chars()
257        .map(|c| {
258            if c.is_alphanumeric() || matches!(c, '.' | '-' | '_' | ' ') {
259                c
260            } else {
261                '_'
262            }
263        })
264        .collect();
265    let trimmed = cleaned.trim_matches(|c: char| c == ' ' || c == '.');
266    if trimmed.is_empty() {
267        "untitled".into()
268    } else {
269        trimmed.to_string()
270    }
271}
272
273fn pick_non_colliding(dir: &Path, name: &str) -> PathBuf {
274    let base = dir.join(name);
275    if !base.exists() {
276        return base;
277    }
278    let (stem, ext) = match name.rsplit_once('.') {
279        Some((s, e)) => (s.to_string(), format!(".{e}")),
280        None => (name.to_string(), String::new()),
281    };
282    for n in 1..1000 {
283        let candidate = dir.join(format!("{stem}-{n}{ext}"));
284        if !candidate.exists() {
285            return candidate;
286        }
287    }
288    dir.join(format!("{stem}-collision{ext}"))
289}
290
291/// Best-effort MIME guess from a filename. Returns None for unknown
292/// extensions — receivers should not depend on this being present.
293pub fn guess_mime(name: &str) -> Option<String> {
294    let lower = name.to_lowercase();
295    let ext = lower.rsplit('.').next()?;
296    let m = match ext {
297        "png" => "image/png",
298        "jpg" | "jpeg" => "image/jpeg",
299        "gif" => "image/gif",
300        "webp" => "image/webp",
301        "bmp" => "image/bmp",
302        "pdf" => "application/pdf",
303        "mp4" => "video/mp4",
304        "webm" => "video/webm",
305        "mov" => "video/quicktime",
306        "mp3" => "audio/mpeg",
307        "wav" => "audio/wav",
308        "ogg" => "audio/ogg",
309        "txt" => "text/plain",
310        "md" => "text/markdown",
311        "json" => "application/json",
312        "zip" => "application/zip",
313        "tar" => "application/x-tar",
314        "gz" => "application/gzip",
315        "rs" => "text/x-rust",
316        "py" => "text/x-python",
317        _ => return None,
318    };
319    Some(m.into())
320}
321
322#[cfg(test)]
323mod tests {
324    use super::*;
325
326    fn fresh_manager() -> (FileManager, tempfile::TempDir) {
327        let dir = tempfile::tempdir().expect("tempdir");
328        let m = FileManager::new(dir.path()).expect("new");
329        (m, dir)
330    }
331
332    #[test]
333    fn sanitize_strips_slashes_and_control_chars() {
334        // Leading `..` is stripped (no hidden traversal); inner is fine
335        // because slashes are already replaced with `_`.
336        assert_eq!(sanitize_filename("../../etc/passwd"), "_.._etc_passwd");
337        assert_eq!(sanitize_filename("file/with\\path"), "file_with_path");
338        assert_eq!(sanitize_filename(""), "untitled");
339        assert_eq!(sanitize_filename("..."), "untitled");
340    }
341
342    #[test]
343    fn collision_picks_dash_suffix() {
344        let tmp = tempfile::tempdir().unwrap();
345        let p = tmp.path();
346        let first = pick_non_colliding(p, "a.txt");
347        std::fs::write(&first, b"x").unwrap();
348        let second = pick_non_colliding(p, "a.txt");
349        assert_eq!(second.file_name().unwrap().to_str().unwrap(), "a-1.txt");
350        std::fs::write(&second, b"x").unwrap();
351        let third = pick_non_colliding(p, "a.txt");
352        assert_eq!(third.file_name().unwrap().to_str().unwrap(), "a-2.txt");
353    }
354
355    #[test]
356    fn mime_lookup() {
357        assert_eq!(guess_mime("photo.png").as_deref(), Some("image/png"));
358        assert_eq!(guess_mime("notes.md").as_deref(), Some("text/markdown"));
359        assert!(guess_mime("unknown.xyz").is_none());
360    }
361
362    #[test]
363    fn outgoing_plan_round_trip_with_chunking() {
364        let (mgr, _t) = fresh_manager();
365        let bytes: Vec<u8> = (0..200_000u32).map(|i| (i & 0xFF) as u8).collect();
366        let plan = mgr
367            .prepare_outgoing_from_bytes("file.bin", None, bytes.clone())
368            .unwrap();
369        let expected_chunks = (bytes.len() + CHUNK_SIZE - 1) / CHUNK_SIZE;
370        assert_eq!(plan.chunks.len(), expected_chunks);
371        assert_eq!(plan.size_bytes, bytes.len() as u64);
372
373        // Reassemble via accept_chunk into a fresh manager — should hit
374        // hash-verification path and produce a cache file.
375        let (mgr2, _t2) = fresh_manager();
376        let total = plan.chunks.len() as u32;
377        let mut completion: Option<CompletedFile> = None;
378        for (i, chunk) in plan.chunks.iter().enumerate() {
379            let c = mgr2
380                .accept_chunk(&plan.file_id, i as u32, total, chunk.clone(), plan.size_bytes)
381                .unwrap();
382            if c.is_some() {
383                completion = c;
384            }
385        }
386        let done = completion.expect("completion on last chunk");
387        assert_eq!(done.file_id, plan.file_id);
388        assert!(done.cache_path.exists());
389        let back = std::fs::read(&done.cache_path).unwrap();
390        assert_eq!(back, bytes);
391    }
392
393    #[test]
394    fn duplicate_chunks_are_ignored_no_double_count() {
395        let (mgr, _t) = fresh_manager();
396        let plan = mgr
397            .prepare_outgoing_from_bytes("x.bin", None, vec![7u8; 200_000])
398            .unwrap();
399        let total = plan.chunks.len() as u32;
400        let (mgr2, _t2) = fresh_manager();
401        // Send chunk 0 twice — should not corrupt accounting.
402        mgr2.accept_chunk(
403            &plan.file_id,
404            0,
405            total,
406            plan.chunks[0].clone(),
407            plan.size_bytes,
408        )
409        .unwrap();
410        mgr2.accept_chunk(
411            &plan.file_id,
412            0,
413            total,
414            plan.chunks[0].clone(),
415            plan.size_bytes,
416        )
417        .unwrap();
418        // Send remaining chunks.
419        for i in 1..total {
420            let r = mgr2
421                .accept_chunk(
422                    &plan.file_id,
423                    i,
424                    total,
425                    plan.chunks[i as usize].clone(),
426                    plan.size_bytes,
427                )
428                .unwrap();
429            if i + 1 == total {
430                assert!(r.is_some(), "completion should fire on last chunk");
431            }
432        }
433    }
434
435    #[test]
436    fn hash_mismatch_is_rejected() {
437        let (mgr, _t) = fresh_manager();
438        let bytes = vec![1u8; 100];
439        let plan = mgr
440            .prepare_outgoing_from_bytes("x.bin", None, bytes)
441            .unwrap();
442        // Tamper with chunk 0.
443        let (mgr2, _t2) = fresh_manager();
444        let mut bad = plan.chunks[0].clone();
445        bad[0] = bad[0].wrapping_add(1);
446        let total = plan.chunks.len() as u32;
447        let err = mgr2
448            .accept_chunk(&plan.file_id, 0, total, bad, plan.size_bytes)
449            .err();
450        // Single-chunk file: completion attempted on the only chunk →
451        // hash mismatch surfaces immediately.
452        if total == 1 {
453            assert!(err.is_some(), "expected hash mismatch error");
454        }
455    }
456
457    #[test]
458    fn write_to_downloads_collision_suffixes() {
459        let tmp = tempfile::tempdir().unwrap();
460        let dl = tmp.path().to_path_buf();
461        // Manually call sanitize / pick to avoid touching real ~/Downloads.
462        let a = pick_non_colliding(&dl, "doc.txt");
463        std::fs::write(&a, b"a").unwrap();
464        let b = pick_non_colliding(&dl, "doc.txt");
465        assert!(b.file_name().unwrap().to_str().unwrap().contains("doc-1"));
466    }
467}