ant_core/datamap_file.rs
1//! Persisted DataMap file format.
2//!
3//! A `.datamap` file is the on-disk form of a `self_encryption::DataMap` that
4//! private uploads need in order to be downloaded later. Every callsite that
5//! writes one (today: `ant-cli` and `ant-gui`) routes through this module so
6//! the wire format and naming convention stay consistent.
7//!
8//! # Wire format
9//!
10//! Canonical: msgpack (`rmp_serde`). The file contains the bare serialized
11//! `DataMap` — no header, no envelope. This matches the format `ant-cli` has
12//! always written; `ant-gui` adopts it here.
13//!
14//! For backwards compatibility, [`read_datamap`] also accepts the legacy JSON
15//! format that older `ant-gui` versions wrote. Format detection is by sniffing
16//! the first byte of the file:
17//! - `0x7B` (`{`) → JSON (legacy ant-gui)
18//! - else → msgpack (canonical)
19//!
20//! A future envelope format wrapping the DataMap with metadata (e.g. original
21//! filename, version) would be signalled by a magic byte that is neither `{`
22//! nor a valid msgpack initial byte. The reserved byte for that purpose is
23//! `0xC1`, which is unused in the msgpack spec.
24//!
25//! # Naming convention
26//!
27//! The original filename is preserved verbatim with `.datamap` appended:
28//! `holiday.jpg` → `holiday.jpg.datamap`, `Makefile` → `Makefile.datamap`,
29//! `archive.tar.gz` → `archive.tar.gz.datamap`. Files without an extension
30//! are handled naturally because we append rather than replace.
31//!
32//! On collision under [`CollisionPolicy::NumericSuffix`], a `-N` (starting at
33//! 2) is inserted before the `.datamap` extension: `holiday.jpg-2.datamap`,
34//! `holiday.jpg-3.datamap`, …
35
36use std::ffi::OsString;
37use std::fs;
38use std::io::Write;
39use std::path::{Path, PathBuf};
40
41use self_encryption::DataMap;
42use tempfile::NamedTempFile;
43
44use crate::data::error::{Error, Result};
45
46/// Extension appended to every persisted datamap file.
47pub const DATAMAP_EXTENSION: &str = "datamap";
48
49/// Cap on collision-suffix attempts before [`write_datamap`] gives up.
50///
51/// In normal use a directory will see at most a handful of repeated
52/// uploads; this bound just protects against pathological state (e.g.
53/// thousands of stale entries) so we fail fast instead of looping.
54/// Set generously (1000) so legitimate users with many backups of the
55/// same file don't hit a confusing error before pathological state does.
56const MAX_COLLISION_ATTEMPTS: u32 = 1000;
57
58/// Behaviour when a target datamap filename already exists.
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum CollisionPolicy {
61 /// Replace the existing file. Used by `ant-cli` when invoked with
62 /// `--overwrite` to preserve its pre-`feat/datamap-fs-helper` behaviour.
63 Overwrite,
64 /// Insert `-N` (starting at 2) between the filename and `.datamap`.
65 /// `holiday.jpg.datamap` → `holiday.jpg-2.datamap` → `-3` → … capped at
66 /// `MAX_COLLISION_ATTEMPTS` (1000).
67 NumericSuffix,
68}
69
70/// Construct the canonical datamap filename for an arbitrary input filename.
71///
72/// Appends `.datamap` without replacing any existing extension, then runs
73/// the result through filename sanitization (alphanumerics + a small set
74/// of punctuation kept; everything else replaced with `_`) so platform-
75/// illegal characters don't reach the filesystem. Falls back to
76/// `datamap.datamap` when the input sanitizes to an empty string.
77///
78/// Pure function: takes only the basename, never a path with separators.
79pub fn datamap_filename_for(original_name: &str) -> String {
80 let sanitized = sanitize_filename(original_name);
81 if sanitized.is_empty() {
82 format!("datamap.{DATAMAP_EXTENSION}")
83 } else {
84 format!("{sanitized}.{DATAMAP_EXTENSION}")
85 }
86}
87
88/// Inverse of [`datamap_filename_for`] for download UX. Strips a single
89/// trailing `.datamap` from the basename of `path`.
90///
91/// Returns `None` when `path` has no UTF-8 basename, doesn't end in
92/// `.datamap`, or would produce an empty result. Does *not* attempt to undo
93/// `-N` collision suffixes — `holiday.jpg-2.datamap` returns `holiday.jpg-2`
94/// rather than `holiday.jpg`. The collision suffix is a write-side artifact;
95/// callers can offer the literal stem as a default and let users edit.
96pub fn original_name_from_datamap(path: &Path) -> Option<OsString> {
97 let basename = path.file_name()?.to_str()?;
98 let stripped = basename.strip_suffix(&format!(".{DATAMAP_EXTENSION}"))?;
99 if stripped.is_empty() {
100 None
101 } else {
102 Some(OsString::from(stripped))
103 }
104}
105
106/// Write `dm` to `dir` using the canonical naming and the given collision
107/// policy. Returns the absolute path to the written file.
108///
109/// Atomicity: writes to a tempfile in `dir`, fsyncs the file, renames into
110/// place, then (on Unix) best-effort-fsyncs the parent directory. A crash
111/// at any point cannot leave a half-serialized datamap at the target
112/// path, and on Unix the rename itself is durable across power loss
113/// because the directory entry is flushed. The tempfile is created on
114/// the same filesystem as `dir` to keep the rename atomic. Windows
115/// relies on NTFS metadata journaling for rename durability — we do not
116/// fsync the directory there because Windows does not expose that
117/// operation through `std::fs`.
118pub fn write_datamap(
119 dir: &Path,
120 original_name: &str,
121 dm: &DataMap,
122 policy: CollisionPolicy,
123) -> Result<PathBuf> {
124 fs::create_dir_all(dir)?;
125 let bytes = rmp_serde::to_vec(dm)
126 .map_err(|e| Error::Serialization(format!("DataMap msgpack encode failed: {e}")))?;
127 let base_filename = datamap_filename_for(original_name);
128 let target = reserve_target_path(dir, &base_filename, policy)?;
129 write_atomic(dir, &target, &bytes)?;
130 Ok(target)
131}
132
133/// Read a persisted datamap, auto-detecting msgpack vs legacy JSON.
134pub fn read_datamap(path: &Path) -> Result<DataMap> {
135 let bytes = fs::read(path)?;
136 if bytes.first() == Some(&b'{') {
137 // Legacy JSON written by ant-gui versions prior to the shared helper.
138 serde_json::from_slice::<DataMap>(&bytes)
139 .map_err(|e| Error::Serialization(format!("DataMap JSON decode failed: {e}")))
140 } else {
141 rmp_serde::from_slice::<DataMap>(&bytes)
142 .map_err(|e| Error::Serialization(format!("DataMap msgpack decode failed: {e}")))
143 }
144}
145
146/// Reduce an arbitrary filename to one safe to place on disk: keep
147/// alphanumerics and a small set of common punctuation, replace every other
148/// character with `_`, then trim surrounding whitespace. Leading/trailing
149/// dots are intentionally preserved so dotfiles like `.bashrc` survive.
150fn sanitize_filename(name: &str) -> String {
151 name.chars()
152 .map(|c| {
153 if c.is_alphanumeric() || matches!(c, ' ' | '-' | '_' | '.' | '(' | ')') {
154 c
155 } else {
156 '_'
157 }
158 })
159 .collect::<String>()
160 .trim()
161 .to_string()
162}
163
164fn reserve_target_path(
165 dir: &Path,
166 base_filename: &str,
167 policy: CollisionPolicy,
168) -> Result<PathBuf> {
169 match policy {
170 CollisionPolicy::Overwrite => Ok(dir.join(base_filename)),
171 CollisionPolicy::NumericSuffix => {
172 // base_filename is e.g. `photo.jpg.datamap`. The collision suffix
173 // sits between the trailing `.datamap` and the rest of the name:
174 // `photo.jpg-2.datamap`, never `photo-2.jpg.datamap`.
175 let stem = base_filename
176 .strip_suffix(&format!(".{DATAMAP_EXTENSION}"))
177 .unwrap_or(base_filename);
178 for attempt in 0..MAX_COLLISION_ATTEMPTS {
179 let candidate = if attempt == 0 {
180 base_filename.to_string()
181 } else {
182 format!("{stem}-{}.{DATAMAP_EXTENSION}", attempt + 1)
183 };
184 let path = dir.join(&candidate);
185 if !path.exists() {
186 return Ok(path);
187 }
188 }
189 Err(Error::Storage(format!(
190 "Unable to reserve a free datamap filename after {MAX_COLLISION_ATTEMPTS} attempts in {}",
191 dir.display()
192 )))
193 }
194 }
195}
196
197fn write_atomic(dir: &Path, target: &Path, bytes: &[u8]) -> Result<()> {
198 let mut tmp = NamedTempFile::new_in(dir)?;
199 tmp.write_all(bytes)?;
200 tmp.as_file().sync_all()?;
201 tmp.persist(target).map_err(|e| Error::Io(e.error))?;
202 // On Unix, fsync the parent directory so the rename itself survives
203 // a crash. On ext4 (default mount opts) and btrfs the rename can
204 // otherwise be lost if the directory entry hasn't reached disk. This
205 // is best-effort — a failure here means we wrote a valid datamap but
206 // can't prove the rename is durable, which is no worse than where we
207 // were before the call. Windows has no portable directory-fsync, so
208 // we skip it there and lean on NTFS metadata journaling.
209 #[cfg(unix)]
210 {
211 if let Ok(dir_handle) = fs::File::open(dir) {
212 let _ = dir_handle.sync_all();
213 }
214 }
215 Ok(())
216}