Skip to main content

huddle_core/files/
mod.rs

1//! File transfer: chunking, reassembly, hash verification, cache, save.
2//!
3//! Cache layout:
4//!   <data_dir>/files/cache/<file_id>          // verified, complete
5//!   <data_dir>/files/cache/<file_id>.part     // in-progress reassembly
6//!
7//! File-IDs are the SHA-256 hash of the wire bytes (plaintext for
8//! non-encrypted offers, ciphertext for encrypted offers — the
9//! encryption layer is a separate concern). Receivers verify each
10//! completed transfer's bytes match the announced file_id before
11//! exposing the file to the caller.
12
13pub mod encryption;
14
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::sync::Mutex;
19
20use sha2::{Digest, Sha256};
21
22use crate::error::{HuddleError, Result};
23
24/// Bytes per chunk on the wire. A `FileChunk` is base64-encoded inside a
25/// JSON envelope, so the raw chunk must leave room for ~34% base64
26/// expansion plus the envelope and still fit under gossipsub's transmit
27/// limit. 40 KiB raw → ~55 KiB on the wire, comfortably under even the
28/// 64 KiB gossipsub default (and well under the 256 KiB ceiling huddle
29/// sets explicitly — see `network::start_network_with`).
30pub const CHUNK_SIZE: usize = 40 * 1024;
31
32/// Hard cap on a single offer for Phase 2. Larger files defer to a
33/// dedicated libp2p stream protocol (see plan.md Phase 3 notes).
34pub const MAX_FILE_SIZE: u64 = 1024 * 1024;
35
36/// What `prepare_outgoing` hands back: enough to drive a sequence of
37/// FileOffer + N FileChunk gossipsub messages.
38#[derive(Debug, Clone)]
39pub struct OutgoingPlan {
40    pub file_id: String,
41    pub name: String,
42    pub mime: Option<String>,
43    pub size_bytes: u64,
44    pub chunks: Vec<Vec<u8>>,
45}
46
47/// What `accept_chunk` returns on the chunk that completes the transfer.
48#[derive(Debug, Clone)]
49pub struct CompletedFile {
50    pub file_id: String,
51    pub cache_path: PathBuf,
52    pub size_bytes: u64,
53}
54
55struct IncomingTransfer {
56    expected_total: u32,
57    /// Announced total file size. Seeded from the caller's best guess
58    /// (the offer's `size_bytes`, or `MAX_FILE_SIZE` when chunks arrive
59    /// before the offer) and corrected by `set_expected_size` once the
60    /// offer lands. Drives the progress bar's denominator.
61    expected_size: u64,
62    chunks: HashMap<u32, Vec<u8>>,
63    bytes_received: u64,
64}
65
66pub struct FileManager {
67    cache_dir: PathBuf,
68    incoming: Mutex<HashMap<String, IncomingTransfer>>,
69}
70
71impl FileManager {
72    /// `data_dir` is huddle's per-user data directory; the cache lives
73    /// underneath at `<data_dir>/files/cache`.
74    pub fn new(data_dir: &Path) -> Result<Self> {
75        let cache_dir = data_dir.join("files").join("cache");
76        fs::create_dir_all(&cache_dir)?;
77        Ok(Self {
78            cache_dir,
79            incoming: Mutex::new(HashMap::new()),
80        })
81    }
82
83    pub fn cache_dir(&self) -> &Path {
84        &self.cache_dir
85    }
86
87    pub fn cache_path(&self, file_id: &str) -> PathBuf {
88        self.cache_dir.join(file_id)
89    }
90
91    /// Read a previously-completed transfer's bytes from cache.
92    pub fn read_cache(&self, file_id: &str) -> Result<Vec<u8>> {
93        let path = self.cache_path(file_id);
94        Ok(fs::read(&path)?)
95    }
96
97    /// Build a transfer plan from an on-disk file.
98    pub fn prepare_outgoing_from_path(&self, path: &Path) -> Result<OutgoingPlan> {
99        let bytes = fs::read(path)?;
100        let name = path
101            .file_name()
102            .map(|n| n.to_string_lossy().to_string())
103            .unwrap_or_else(|| "untitled".into());
104        let mime = guess_mime(&name);
105        self.prepare_outgoing_from_bytes(&name, mime, bytes)
106    }
107
108    /// Build a transfer plan from an in-memory blob (useful for the
109    /// encrypted path, where the caller pre-encrypts a file).
110    pub fn prepare_outgoing_from_bytes(
111        &self,
112        name: &str,
113        mime: Option<String>,
114        bytes: Vec<u8>,
115    ) -> Result<OutgoingPlan> {
116        let size = bytes.len() as u64;
117        if size > MAX_FILE_SIZE {
118            return Err(HuddleError::Other(format!(
119                "file is {} bytes — Phase 2 cap is {} (~1 MiB)",
120                size, MAX_FILE_SIZE
121            )));
122        }
123        let file_id = sha256_hex(&bytes);
124        let chunks: Vec<Vec<u8>> = bytes.chunks(CHUNK_SIZE).map(|c| c.to_vec()).collect();
125        let chunks = if chunks.is_empty() {
126            vec![Vec::new()]
127        } else {
128            chunks
129        };
130
131        // Stash the outgoing file into our own cache too — that way the
132        // sender's UI can show the same "ready" card and re-save it
133        // later without a round-trip.
134        let cache_path = self.cache_path(&file_id);
135        if !cache_path.exists() {
136            fs::write(&cache_path, &bytes)?;
137        }
138
139        Ok(OutgoingPlan {
140            file_id,
141            name: name.to_string(),
142            mime,
143            size_bytes: size,
144            chunks,
145        })
146    }
147
148    /// Accept one chunk of an incoming transfer. Returns `Some` only on
149    /// the last chunk that completes the file (after SHA-256 verification).
150    pub fn accept_chunk(
151        &self,
152        file_id: &str,
153        chunk_index: u32,
154        total_chunks: u32,
155        data: Vec<u8>,
156        expected_size: u64,
157    ) -> Result<Option<CompletedFile>> {
158        if expected_size > MAX_FILE_SIZE {
159            return Err(HuddleError::Other(format!(
160                "incoming size {} exceeds Phase 2 cap",
161                expected_size
162            )));
163        }
164        // Fast-skip if already complete.
165        let cache_path = self.cache_path(file_id);
166        if cache_path.exists() {
167            let bytes = fs::read(&cache_path)?;
168            if sha256_hex(&bytes) == file_id {
169                return Ok(Some(CompletedFile {
170                    file_id: file_id.into(),
171                    cache_path,
172                    size_bytes: bytes.len() as u64,
173                }));
174            }
175        }
176
177        let mut map = self.incoming.lock().unwrap();
178        let entry = map.entry(file_id.to_string()).or_insert(IncomingTransfer {
179            expected_total: total_chunks,
180            expected_size,
181            chunks: HashMap::new(),
182            bytes_received: 0,
183        });
184        if entry.expected_total != total_chunks {
185            return Err(HuddleError::Other(
186                "chunk total disagrees with prior chunks".into(),
187            ));
188        }
189        if !entry.chunks.contains_key(&chunk_index) {
190            entry.bytes_received += data.len() as u64;
191            entry.chunks.insert(chunk_index, data);
192        }
193
194        if entry.chunks.len() as u32 != entry.expected_total {
195            return Ok(None);
196        }
197
198        // All chunks arrived — assemble and verify.
199        let total = entry.expected_total;
200        let mut assembled: Vec<u8> = Vec::with_capacity(entry.bytes_received as usize);
201        for idx in 0..total {
202            let part = entry
203                .chunks
204                .get(&idx)
205                .ok_or_else(|| HuddleError::Other(format!("missing chunk {idx}")))?;
206            assembled.extend_from_slice(part);
207        }
208        map.remove(file_id);
209        drop(map);
210
211        let computed = sha256_hex(&assembled);
212        if computed != file_id {
213            return Err(HuddleError::Other(format!(
214                "hash mismatch — expected {}, got {}",
215                file_id, computed
216            )));
217        }
218        // Write to a `.part` then atomically rename — never expose a
219        // partial file under the final name.
220        let part = self.cache_dir.join(format!("{}.part", file_id));
221        fs::write(&part, &assembled)?;
222        fs::rename(&part, &cache_path)?;
223
224        Ok(Some(CompletedFile {
225            file_id: file_id.into(),
226            cache_path,
227            size_bytes: assembled.len() as u64,
228        }))
229    }
230
231    /// Drop any partial state for an incoming transfer.
232    pub fn cancel_incoming(&self, file_id: &str) {
233        self.incoming.lock().unwrap().remove(file_id);
234    }
235
236    /// Record the authoritative total size for an in-progress transfer —
237    /// called when a FileOffer arrives after chunks have already started,
238    /// so the progress denominator stops being a guess. No-op when there
239    /// is no active transfer for `file_id`.
240    pub fn set_expected_size(&self, file_id: &str, size: u64) {
241        if let Some(e) = self.incoming.lock().unwrap().get_mut(file_id) {
242            e.expected_size = size;
243        }
244    }
245
246    /// Bytes received so far and the expected total, for an in-progress
247    /// transfer.
248    pub fn progress(&self, file_id: &str) -> Option<(u64, u64)> {
249        let map = self.incoming.lock().unwrap();
250        let e = map.get(file_id)?;
251        Some((e.bytes_received, e.expected_size))
252    }
253
254    /// Copy `bytes` into the platform's Downloads folder under
255    /// `target_name` (with `-N` suffix on collision). Returns the
256    /// absolute path of the saved file.
257    pub fn write_to_downloads(&self, target_name: &str, bytes: &[u8]) -> Result<PathBuf> {
258        let dir = dirs::download_dir()
259            .or_else(dirs::home_dir)
260            .ok_or_else(|| HuddleError::Other("no Downloads / home directory".into()))?;
261        fs::create_dir_all(&dir)?;
262        let sanitized = sanitize_filename(target_name);
263        let path = pick_non_colliding(&dir, &sanitized);
264        fs::write(&path, bytes)?;
265        Ok(path)
266    }
267}
268
269fn sha256_hex(bytes: &[u8]) -> String {
270    let hash = Sha256::digest(bytes);
271    hex::encode(hash)
272}
273
274fn sanitize_filename(name: &str) -> String {
275    let cleaned: String = name
276        .chars()
277        .map(|c| {
278            if c.is_alphanumeric() || matches!(c, '.' | '-' | '_' | ' ') {
279                c
280            } else {
281                '_'
282            }
283        })
284        .collect();
285    let trimmed = cleaned.trim_matches(|c: char| c == ' ' || c == '.');
286    if trimmed.is_empty() {
287        "untitled".into()
288    } else {
289        trimmed.to_string()
290    }
291}
292
293fn pick_non_colliding(dir: &Path, name: &str) -> PathBuf {
294    let base = dir.join(name);
295    if !base.exists() {
296        return base;
297    }
298    let (stem, ext) = match name.rsplit_once('.') {
299        Some((s, e)) => (s.to_string(), format!(".{e}")),
300        None => (name.to_string(), String::new()),
301    };
302    for n in 1..1000 {
303        let candidate = dir.join(format!("{stem}-{n}{ext}"));
304        if !candidate.exists() {
305            return candidate;
306        }
307    }
308    dir.join(format!("{stem}-collision{ext}"))
309}
310
311/// Best-effort MIME guess from a filename. Returns None for unknown
312/// extensions — receivers should not depend on this being present.
313pub fn guess_mime(name: &str) -> Option<String> {
314    let lower = name.to_lowercase();
315    let ext = lower.rsplit('.').next()?;
316    let m = match ext {
317        "png" => "image/png",
318        "jpg" | "jpeg" => "image/jpeg",
319        "gif" => "image/gif",
320        "webp" => "image/webp",
321        "bmp" => "image/bmp",
322        "pdf" => "application/pdf",
323        "mp4" => "video/mp4",
324        "webm" => "video/webm",
325        "mov" => "video/quicktime",
326        "mp3" => "audio/mpeg",
327        "wav" => "audio/wav",
328        "ogg" => "audio/ogg",
329        "txt" => "text/plain",
330        "md" => "text/markdown",
331        "json" => "application/json",
332        "zip" => "application/zip",
333        "tar" => "application/x-tar",
334        "gz" => "application/gzip",
335        "rs" => "text/x-rust",
336        "py" => "text/x-python",
337        _ => return None,
338    };
339    Some(m.into())
340}
341
342#[cfg(test)]
343mod tests {
344    use super::*;
345
346    fn fresh_manager() -> (FileManager, tempfile::TempDir) {
347        let dir = tempfile::tempdir().expect("tempdir");
348        let m = FileManager::new(dir.path()).expect("new");
349        (m, dir)
350    }
351
352    #[test]
353    fn sanitize_strips_slashes_and_control_chars() {
354        // Leading `..` is stripped (no hidden traversal); inner is fine
355        // because slashes are already replaced with `_`.
356        assert_eq!(sanitize_filename("../../etc/passwd"), "_.._etc_passwd");
357        assert_eq!(sanitize_filename("file/with\\path"), "file_with_path");
358        assert_eq!(sanitize_filename(""), "untitled");
359        assert_eq!(sanitize_filename("..."), "untitled");
360    }
361
362    #[test]
363    fn collision_picks_dash_suffix() {
364        let tmp = tempfile::tempdir().unwrap();
365        let p = tmp.path();
366        let first = pick_non_colliding(p, "a.txt");
367        std::fs::write(&first, b"x").unwrap();
368        let second = pick_non_colliding(p, "a.txt");
369        assert_eq!(second.file_name().unwrap().to_str().unwrap(), "a-1.txt");
370        std::fs::write(&second, b"x").unwrap();
371        let third = pick_non_colliding(p, "a.txt");
372        assert_eq!(third.file_name().unwrap().to_str().unwrap(), "a-2.txt");
373    }
374
375    #[test]
376    fn mime_lookup() {
377        assert_eq!(guess_mime("photo.png").as_deref(), Some("image/png"));
378        assert_eq!(guess_mime("notes.md").as_deref(), Some("text/markdown"));
379        assert!(guess_mime("unknown.xyz").is_none());
380    }
381
382    #[test]
383    fn outgoing_plan_round_trip_with_chunking() {
384        let (mgr, _t) = fresh_manager();
385        let bytes: Vec<u8> = (0..200_000u32).map(|i| (i & 0xFF) as u8).collect();
386        let plan = mgr
387            .prepare_outgoing_from_bytes("file.bin", None, bytes.clone())
388            .unwrap();
389        let expected_chunks = (bytes.len() + CHUNK_SIZE - 1) / CHUNK_SIZE;
390        assert_eq!(plan.chunks.len(), expected_chunks);
391        assert_eq!(plan.size_bytes, bytes.len() as u64);
392
393        // Reassemble via accept_chunk into a fresh manager — should hit
394        // hash-verification path and produce a cache file.
395        let (mgr2, _t2) = fresh_manager();
396        let total = plan.chunks.len() as u32;
397        let mut completion: Option<CompletedFile> = None;
398        for (i, chunk) in plan.chunks.iter().enumerate() {
399            let c = mgr2
400                .accept_chunk(&plan.file_id, i as u32, total, chunk.clone(), plan.size_bytes)
401                .unwrap();
402            if c.is_some() {
403                completion = c;
404            }
405        }
406        let done = completion.expect("completion on last chunk");
407        assert_eq!(done.file_id, plan.file_id);
408        assert!(done.cache_path.exists());
409        let back = std::fs::read(&done.cache_path).unwrap();
410        assert_eq!(back, bytes);
411    }
412
413    #[test]
414    fn duplicate_chunks_are_ignored_no_double_count() {
415        let (mgr, _t) = fresh_manager();
416        let plan = mgr
417            .prepare_outgoing_from_bytes("x.bin", None, vec![7u8; 200_000])
418            .unwrap();
419        let total = plan.chunks.len() as u32;
420        let (mgr2, _t2) = fresh_manager();
421        // Send chunk 0 twice — should not corrupt accounting.
422        mgr2.accept_chunk(
423            &plan.file_id,
424            0,
425            total,
426            plan.chunks[0].clone(),
427            plan.size_bytes,
428        )
429        .unwrap();
430        mgr2.accept_chunk(
431            &plan.file_id,
432            0,
433            total,
434            plan.chunks[0].clone(),
435            plan.size_bytes,
436        )
437        .unwrap();
438        // Send remaining chunks.
439        for i in 1..total {
440            let r = mgr2
441                .accept_chunk(
442                    &plan.file_id,
443                    i,
444                    total,
445                    plan.chunks[i as usize].clone(),
446                    plan.size_bytes,
447                )
448                .unwrap();
449            if i + 1 == total {
450                assert!(r.is_some(), "completion should fire on last chunk");
451            }
452        }
453    }
454
455    #[test]
456    fn hash_mismatch_is_rejected() {
457        let (mgr, _t) = fresh_manager();
458        let bytes = vec![1u8; 100];
459        let plan = mgr
460            .prepare_outgoing_from_bytes("x.bin", None, bytes)
461            .unwrap();
462        // Tamper with chunk 0.
463        let (mgr2, _t2) = fresh_manager();
464        let mut bad = plan.chunks[0].clone();
465        bad[0] = bad[0].wrapping_add(1);
466        let total = plan.chunks.len() as u32;
467        let err = mgr2
468            .accept_chunk(&plan.file_id, 0, total, bad, plan.size_bytes)
469            .err();
470        // Single-chunk file: completion attempted on the only chunk →
471        // hash mismatch surfaces immediately.
472        if total == 1 {
473            assert!(err.is_some(), "expected hash mismatch error");
474        }
475    }
476
477    #[test]
478    fn write_to_downloads_collision_suffixes() {
479        let tmp = tempfile::tempdir().unwrap();
480        let dl = tmp.path().to_path_buf();
481        // Manually call sanitize / pick to avoid touching real ~/Downloads.
482        let a = pick_non_colliding(&dl, "doc.txt");
483        std::fs::write(&a, b"a").unwrap();
484        let b = pick_non_colliding(&dl, "doc.txt");
485        assert!(b.file_name().unwrap().to_str().unwrap().contains("doc-1"));
486    }
487}