Skip to main content

huddle_core/files/
mod.rs

1//! File transfer: chunking, reassembly, hash verification, cache, save.
2//!
3//! Cache layout:
4//!   <data_dir>/files/cache/<file_id>          // verified, complete
5//!   <data_dir>/files/cache/<file_id>.part     // in-progress reassembly
6//!
7//! File-IDs are the SHA-256 hash of the wire bytes (plaintext for
8//! non-encrypted offers, ciphertext for encrypted offers — the
9//! encryption layer is a separate concern). Receivers verify each
10//! completed transfer's bytes match the announced file_id before
11//! exposing the file to the caller.
12
13pub mod encryption;
14
15use std::collections::HashMap;
16use std::fs;
17use std::path::{Path, PathBuf};
18use std::sync::Mutex;
19
20use sha2::{Digest, Sha256};
21
22use crate::error::{HuddleError, Result};
23
24/// Bytes per chunk on the wire. A `FileChunk` is base64-encoded inside a
25/// JSON envelope, so the raw chunk must leave room for ~34% base64
26/// expansion plus the envelope and still fit under gossipsub's transmit
27/// limit. 40 KiB raw → ~55 KiB on the wire, comfortably under even the
28/// 64 KiB gossipsub default (and well under the 256 KiB ceiling huddle
29/// sets explicitly — see `network::start_network_with`).
30pub const CHUNK_SIZE: usize = 40 * 1024;
31
32/// Hard cap on a single offer for Phase 2. Larger files defer to a
33/// dedicated libp2p stream protocol (see plan.md Phase 3 notes).
34pub const MAX_FILE_SIZE: u64 = 1024 * 1024;
35
36/// What `prepare_outgoing` hands back: enough to drive a sequence of
37/// FileOffer + N FileChunk gossipsub messages.
38#[derive(Debug, Clone)]
39pub struct OutgoingPlan {
40    pub file_id: String,
41    pub name: String,
42    pub mime: Option<String>,
43    pub size_bytes: u64,
44    pub chunks: Vec<Vec<u8>>,
45}
46
47/// What `accept_chunk` returns on the chunk that completes the transfer.
48#[derive(Debug, Clone)]
49pub struct CompletedFile {
50    pub file_id: String,
51    pub cache_path: PathBuf,
52    pub size_bytes: u64,
53}
54
55struct IncomingTransfer {
56    expected_total: u32,
57    /// Announced total file size. Seeded from the caller's best guess
58    /// (the offer's `size_bytes`, or `MAX_FILE_SIZE` when chunks arrive
59    /// before the offer) and corrected by `set_expected_size` once the
60    /// offer lands. Drives the progress bar's denominator.
61    expected_size: u64,
62    chunks: HashMap<u32, Vec<u8>>,
63    bytes_received: u64,
64}
65
66pub struct FileManager {
67    cache_dir: PathBuf,
68    incoming: Mutex<HashMap<String, IncomingTransfer>>,
69}
70
71impl FileManager {
72    /// `data_dir` is huddle's per-user data directory; the cache lives
73    /// underneath at `<data_dir>/files/cache`.
74    pub fn new(data_dir: &Path) -> Result<Self> {
75        let cache_dir = data_dir.join("files").join("cache");
76        fs::create_dir_all(&cache_dir)?;
77        Ok(Self {
78            cache_dir,
79            incoming: Mutex::new(HashMap::new()),
80        })
81    }
82
83    pub fn cache_dir(&self) -> &Path {
84        &self.cache_dir
85    }
86
87    pub fn cache_path(&self, file_id: &str) -> PathBuf {
88        self.cache_dir.join(file_id)
89    }
90
91    /// Read a previously-completed transfer's bytes from cache.
92    pub fn read_cache(&self, file_id: &str) -> Result<Vec<u8>> {
93        let path = self.cache_path(file_id);
94        Ok(fs::read(&path)?)
95    }
96
97    /// Build a transfer plan from an on-disk file.
98    pub fn prepare_outgoing_from_path(&self, path: &Path) -> Result<OutgoingPlan> {
99        let bytes = fs::read(path)?;
100        let name = path
101            .file_name()
102            .map(|n| n.to_string_lossy().to_string())
103            .unwrap_or_else(|| "untitled".into());
104        let mime = guess_mime(&name);
105        self.prepare_outgoing_from_bytes(&name, mime, bytes)
106    }
107
108    /// Build a transfer plan from an in-memory blob (useful for the
109    /// encrypted path, where the caller pre-encrypts a file).
110    pub fn prepare_outgoing_from_bytes(
111        &self,
112        name: &str,
113        mime: Option<String>,
114        bytes: Vec<u8>,
115    ) -> Result<OutgoingPlan> {
116        let size = bytes.len() as u64;
117        if size > MAX_FILE_SIZE {
118            return Err(HuddleError::Other(format!(
119                "file is {} bytes — Phase 2 cap is {} (~1 MiB)",
120                size, MAX_FILE_SIZE
121            )));
122        }
123        let file_id = sha256_hex(&bytes);
124        let chunks: Vec<Vec<u8>> = bytes.chunks(CHUNK_SIZE).map(|c| c.to_vec()).collect();
125        let chunks = if chunks.is_empty() {
126            vec![Vec::new()]
127        } else {
128            chunks
129        };
130
131        // Stash the outgoing file into our own cache too — that way the
132        // sender's UI can show the same "ready" card and re-save it
133        // later without a round-trip.
134        let cache_path = self.cache_path(&file_id);
135        if !cache_path.exists() {
136            fs::write(&cache_path, &bytes)?;
137        }
138
139        Ok(OutgoingPlan {
140            file_id,
141            name: name.to_string(),
142            mime,
143            size_bytes: size,
144            chunks,
145        })
146    }
147
148    /// Accept one chunk of an incoming transfer. Returns `Some` only on
149    /// the last chunk that completes the file (after SHA-256 verification).
150    pub fn accept_chunk(
151        &self,
152        file_id: &str,
153        chunk_index: u32,
154        total_chunks: u32,
155        data: Vec<u8>,
156        expected_size: u64,
157    ) -> Result<Option<CompletedFile>> {
158        if expected_size > MAX_FILE_SIZE {
159            return Err(HuddleError::Other(format!(
160                "incoming size {} exceeds Phase 2 cap",
161                expected_size
162            )));
163        }
164        // huddle 0.7.11: pre-0.7.11 only `expected_size` was capped,
165        // not the per-chunk `data.len()`, `chunk_index`, or the running
166        // `bytes_received`. A hostile peer could advertise expected_size
167        // = 1 MiB and stream chunks summing to far more (DoS via heap
168        // exhaustion). Now we enforce all four invariants up front and
169        // drop the transfer if any is violated.
170        if total_chunks == 0 {
171            return Err(HuddleError::Other(
172                "FileChunk: total_chunks must be ≥ 1".into(),
173            ));
174        }
175        if chunk_index >= total_chunks {
176            return Err(HuddleError::Other(format!(
177                "FileChunk: chunk_index {} >= total_chunks {}",
178                chunk_index, total_chunks
179            )));
180        }
181        // Each chunk is bounded by gossipsub's 256 KiB max_transmit_size
182        // anyway, but enforce here too so we don't accept oversize
183        // chunks that snuck past a misbehaving forwarder.
184        const MAX_CHUNK_BYTES: usize = 256 * 1024;
185        if data.len() > MAX_CHUNK_BYTES {
186            return Err(HuddleError::Other(format!(
187                "FileChunk: data {} bytes exceeds per-chunk cap of {}",
188                data.len(),
189                MAX_CHUNK_BYTES
190            )));
191        }
192        // Fast-skip if already complete.
193        let cache_path = self.cache_path(file_id);
194        if cache_path.exists() {
195            let bytes = fs::read(&cache_path)?;
196            if sha256_hex(&bytes) == file_id {
197                return Ok(Some(CompletedFile {
198                    file_id: file_id.into(),
199                    cache_path,
200                    size_bytes: bytes.len() as u64,
201                }));
202            }
203        }
204
205        let mut map = self.incoming.lock().unwrap();
206        let entry = map.entry(file_id.to_string()).or_insert(IncomingTransfer {
207            expected_total: total_chunks,
208            expected_size,
209            chunks: HashMap::new(),
210            bytes_received: 0,
211        });
212        if entry.expected_total != total_chunks {
213            return Err(HuddleError::Other(
214                "chunk total disagrees with prior chunks".into(),
215            ));
216        }
217        if !entry.chunks.contains_key(&chunk_index) {
218            let new_total = entry.bytes_received.saturating_add(data.len() as u64);
219            let ceiling = entry.expected_size.saturating_add(1024);
220            // expected_size acts as the running ceiling. Some senders'
221            // expected_size may be slightly off because of encryption
222            // overhead (Megolm ciphertext > plaintext); allow a 1KiB
223            // grace before rejecting outright.
224            if new_total > ceiling {
225                let advertised = entry.expected_size;
226                // Drop the whole transfer — we've overshot the advertised
227                // size which means either the peer is malicious or the
228                // file changed mid-stream. The mutable borrow on `entry`
229                // dies here so `map.remove` can take the second mut
230                // borrow cleanly.
231                let _ = entry; // make the implicit borrow explicit-end
232                map.remove(file_id);
233                return Err(HuddleError::Other(format!(
234                    "FileChunk: bytes_received {} would exceed expected_size {}",
235                    new_total, advertised
236                )));
237            }
238            entry.bytes_received = new_total;
239            entry.chunks.insert(chunk_index, data);
240        }
241
242        if entry.chunks.len() as u32 != entry.expected_total {
243            return Ok(None);
244        }
245
246        // All chunks arrived — assemble and verify.
247        let total = entry.expected_total;
248        let mut assembled: Vec<u8> = Vec::with_capacity(entry.bytes_received as usize);
249        for idx in 0..total {
250            let part = entry
251                .chunks
252                .get(&idx)
253                .ok_or_else(|| HuddleError::Other(format!("missing chunk {idx}")))?;
254            assembled.extend_from_slice(part);
255        }
256        map.remove(file_id);
257        drop(map);
258
259        let computed = sha256_hex(&assembled);
260        if computed != file_id {
261            return Err(HuddleError::Other(format!(
262                "hash mismatch — expected {}, got {}",
263                file_id, computed
264            )));
265        }
266        // Write to a `.part` then atomically rename — never expose a
267        // partial file under the final name.
268        let part = self.cache_dir.join(format!("{}.part", file_id));
269        fs::write(&part, &assembled)?;
270        fs::rename(&part, &cache_path)?;
271
272        Ok(Some(CompletedFile {
273            file_id: file_id.into(),
274            cache_path,
275            size_bytes: assembled.len() as u64,
276        }))
277    }
278
279    /// Drop any partial state for an incoming transfer.
280    pub fn cancel_incoming(&self, file_id: &str) {
281        self.incoming.lock().unwrap().remove(file_id);
282    }
283
284    /// Record the authoritative total size for an in-progress transfer —
285    /// called when a FileOffer arrives after chunks have already started,
286    /// so the progress denominator stops being a guess. No-op when there
287    /// is no active transfer for `file_id`.
288    pub fn set_expected_size(&self, file_id: &str, size: u64) {
289        if let Some(e) = self.incoming.lock().unwrap().get_mut(file_id) {
290            e.expected_size = size;
291        }
292    }
293
294    /// Bytes received so far and the expected total, for an in-progress
295    /// transfer.
296    pub fn progress(&self, file_id: &str) -> Option<(u64, u64)> {
297        let map = self.incoming.lock().unwrap();
298        let e = map.get(file_id)?;
299        Some((e.bytes_received, e.expected_size))
300    }
301
302    /// Copy `bytes` into the platform's Downloads folder under
303    /// `target_name` (with `-N` suffix on collision). Returns the
304    /// absolute path of the saved file.
305    pub fn write_to_downloads(&self, target_name: &str, bytes: &[u8]) -> Result<PathBuf> {
306        let dir = dirs::download_dir()
307            .or_else(dirs::home_dir)
308            .ok_or_else(|| HuddleError::Other("no Downloads / home directory".into()))?;
309        fs::create_dir_all(&dir)?;
310        let sanitized = sanitize_filename(target_name);
311        let path = pick_non_colliding(&dir, &sanitized);
312        fs::write(&path, bytes)?;
313        Ok(path)
314    }
315}
316
317fn sha256_hex(bytes: &[u8]) -> String {
318    let hash = Sha256::digest(bytes);
319    hex::encode(hash)
320}
321
322fn sanitize_filename(name: &str) -> String {
323    let cleaned: String = name
324        .chars()
325        .map(|c| {
326            if c.is_alphanumeric() || matches!(c, '.' | '-' | '_' | ' ') {
327                c
328            } else {
329                '_'
330            }
331        })
332        .collect();
333    let trimmed = cleaned.trim_matches(|c: char| c == ' ' || c == '.');
334    if trimmed.is_empty() {
335        "untitled".into()
336    } else {
337        trimmed.to_string()
338    }
339}
340
341fn pick_non_colliding(dir: &Path, name: &str) -> PathBuf {
342    let base = dir.join(name);
343    if !base.exists() {
344        return base;
345    }
346    let (stem, ext) = match name.rsplit_once('.') {
347        Some((s, e)) => (s.to_string(), format!(".{e}")),
348        None => (name.to_string(), String::new()),
349    };
350    for n in 1..1000 {
351        let candidate = dir.join(format!("{stem}-{n}{ext}"));
352        if !candidate.exists() {
353            return candidate;
354        }
355    }
356    dir.join(format!("{stem}-collision{ext}"))
357}
358
359/// Best-effort MIME guess from a filename. Returns None for unknown
360/// extensions — receivers should not depend on this being present.
361pub fn guess_mime(name: &str) -> Option<String> {
362    let lower = name.to_lowercase();
363    let ext = lower.rsplit('.').next()?;
364    let m = match ext {
365        "png" => "image/png",
366        "jpg" | "jpeg" => "image/jpeg",
367        "gif" => "image/gif",
368        "webp" => "image/webp",
369        "bmp" => "image/bmp",
370        "pdf" => "application/pdf",
371        "mp4" => "video/mp4",
372        "webm" => "video/webm",
373        "mov" => "video/quicktime",
374        "mp3" => "audio/mpeg",
375        "wav" => "audio/wav",
376        "ogg" => "audio/ogg",
377        "txt" => "text/plain",
378        "md" => "text/markdown",
379        "json" => "application/json",
380        "zip" => "application/zip",
381        "tar" => "application/x-tar",
382        "gz" => "application/gzip",
383        "rs" => "text/x-rust",
384        "py" => "text/x-python",
385        _ => return None,
386    };
387    Some(m.into())
388}
389
390#[cfg(test)]
391mod tests {
392    use super::*;
393
394    fn fresh_manager() -> (FileManager, tempfile::TempDir) {
395        let dir = tempfile::tempdir().expect("tempdir");
396        let m = FileManager::new(dir.path()).expect("new");
397        (m, dir)
398    }
399
400    #[test]
401    fn sanitize_strips_slashes_and_control_chars() {
402        // Leading `..` is stripped (no hidden traversal); inner is fine
403        // because slashes are already replaced with `_`.
404        assert_eq!(sanitize_filename("../../etc/passwd"), "_.._etc_passwd");
405        assert_eq!(sanitize_filename("file/with\\path"), "file_with_path");
406        assert_eq!(sanitize_filename(""), "untitled");
407        assert_eq!(sanitize_filename("..."), "untitled");
408    }
409
410    #[test]
411    fn collision_picks_dash_suffix() {
412        let tmp = tempfile::tempdir().unwrap();
413        let p = tmp.path();
414        let first = pick_non_colliding(p, "a.txt");
415        std::fs::write(&first, b"x").unwrap();
416        let second = pick_non_colliding(p, "a.txt");
417        assert_eq!(second.file_name().unwrap().to_str().unwrap(), "a-1.txt");
418        std::fs::write(&second, b"x").unwrap();
419        let third = pick_non_colliding(p, "a.txt");
420        assert_eq!(third.file_name().unwrap().to_str().unwrap(), "a-2.txt");
421    }
422
423    #[test]
424    fn mime_lookup() {
425        assert_eq!(guess_mime("photo.png").as_deref(), Some("image/png"));
426        assert_eq!(guess_mime("notes.md").as_deref(), Some("text/markdown"));
427        assert!(guess_mime("unknown.xyz").is_none());
428    }
429
430    #[test]
431    fn outgoing_plan_round_trip_with_chunking() {
432        let (mgr, _t) = fresh_manager();
433        let bytes: Vec<u8> = (0..200_000u32).map(|i| (i & 0xFF) as u8).collect();
434        let plan = mgr
435            .prepare_outgoing_from_bytes("file.bin", None, bytes.clone())
436            .unwrap();
437        let expected_chunks = (bytes.len() + CHUNK_SIZE - 1) / CHUNK_SIZE;
438        assert_eq!(plan.chunks.len(), expected_chunks);
439        assert_eq!(plan.size_bytes, bytes.len() as u64);
440
441        // Reassemble via accept_chunk into a fresh manager — should hit
442        // hash-verification path and produce a cache file.
443        let (mgr2, _t2) = fresh_manager();
444        let total = plan.chunks.len() as u32;
445        let mut completion: Option<CompletedFile> = None;
446        for (i, chunk) in plan.chunks.iter().enumerate() {
447            let c = mgr2
448                .accept_chunk(&plan.file_id, i as u32, total, chunk.clone(), plan.size_bytes)
449                .unwrap();
450            if c.is_some() {
451                completion = c;
452            }
453        }
454        let done = completion.expect("completion on last chunk");
455        assert_eq!(done.file_id, plan.file_id);
456        assert!(done.cache_path.exists());
457        let back = std::fs::read(&done.cache_path).unwrap();
458        assert_eq!(back, bytes);
459    }
460
461    #[test]
462    fn duplicate_chunks_are_ignored_no_double_count() {
463        let (mgr, _t) = fresh_manager();
464        let plan = mgr
465            .prepare_outgoing_from_bytes("x.bin", None, vec![7u8; 200_000])
466            .unwrap();
467        let total = plan.chunks.len() as u32;
468        let (mgr2, _t2) = fresh_manager();
469        // Send chunk 0 twice — should not corrupt accounting.
470        mgr2.accept_chunk(
471            &plan.file_id,
472            0,
473            total,
474            plan.chunks[0].clone(),
475            plan.size_bytes,
476        )
477        .unwrap();
478        mgr2.accept_chunk(
479            &plan.file_id,
480            0,
481            total,
482            plan.chunks[0].clone(),
483            plan.size_bytes,
484        )
485        .unwrap();
486        // Send remaining chunks.
487        for i in 1..total {
488            let r = mgr2
489                .accept_chunk(
490                    &plan.file_id,
491                    i,
492                    total,
493                    plan.chunks[i as usize].clone(),
494                    plan.size_bytes,
495                )
496                .unwrap();
497            if i + 1 == total {
498                assert!(r.is_some(), "completion should fire on last chunk");
499            }
500        }
501    }
502
503    #[test]
504    fn hash_mismatch_is_rejected() {
505        let (mgr, _t) = fresh_manager();
506        let bytes = vec![1u8; 100];
507        let plan = mgr
508            .prepare_outgoing_from_bytes("x.bin", None, bytes)
509            .unwrap();
510        // Tamper with chunk 0.
511        let (mgr2, _t2) = fresh_manager();
512        let mut bad = plan.chunks[0].clone();
513        bad[0] = bad[0].wrapping_add(1);
514        let total = plan.chunks.len() as u32;
515        let err = mgr2
516            .accept_chunk(&plan.file_id, 0, total, bad, plan.size_bytes)
517            .err();
518        // Single-chunk file: completion attempted on the only chunk →
519        // hash mismatch surfaces immediately.
520        if total == 1 {
521            assert!(err.is_some(), "expected hash mismatch error");
522        }
523    }
524
525    #[test]
526    fn write_to_downloads_collision_suffixes() {
527        let tmp = tempfile::tempdir().unwrap();
528        let dl = tmp.path().to_path_buf();
529        // Manually call sanitize / pick to avoid touching real ~/Downloads.
530        let a = pick_non_colliding(&dl, "doc.txt");
531        std::fs::write(&a, b"a").unwrap();
532        let b = pick_non_colliding(&dl, "doc.txt");
533        assert!(b.file_name().unwrap().to_str().unwrap().contains("doc-1"));
534    }
535}