solo_storage/path_validation.rs
1// SPDX-License-Identifier: Apache-2.0
2
3//! Refuse to initialize Solo inside a cloud-sync folder.
4//!
5//! Why: SQLCipher + cloud sync is a corruption time-bomb. Dropbox/OneDrive/etc.
6//! sync mid-write, sync the WAL but not the main DB, and produce silently
7//! broken files. We refuse upfront with a clear message — much better than
8//! discovering it after the user's memory is gone.
9//!
10//! The check is heuristic by design (a `Solo` folder named after a cloud
11//! provider is rare; misclassifying a real cloud folder as safe is dangerous).
12//! False positives are recoverable (`--allow-cloud-sync` flag, future work);
13//! false negatives are catastrophic.
14//!
15//! Detection: walks every ancestor path component and tests each against a
16//! list of known cloud-sync folder names. Matching is case-insensitive (NTFS
17//! and HFS+ are case-insensitive at the OS level; ext4 isn't, but typing
18//! `~/dropbox/...` is still risky).
19
20use solo_core::{Error, Result};
21use std::path::{Component, Path};
22use unicode_normalization::UnicodeNormalization;
23
24/// Folder names produced by the most common cloud-sync clients. Case-folded
25/// during comparison. If you add a new entry, prefer the literal folder name
26/// the client creates rather than its branding.
27const CLOUD_SYNC_NAMES: &[&str] = &[
28 // Dropbox: ~/Dropbox, ~/Dropbox (Personal), ~/Dropbox (Business)
29 "dropbox",
30 // OneDrive: ~/OneDrive, ~/OneDrive - <org name>
31 "onedrive",
32 // Google Drive — desktop client mounts under various names
33 "google drive",
34 "googledrive",
35 "my drive",
36 // iCloud Drive — both the user-facing folder and the macOS internal path
37 "icloud drive",
38 "icloud",
39 "icloud~com~apple~clouddocs",
40 "mobile documents",
41 // Box.com
42 "box",
43 "box sync",
44 // pCloud
45 "pclouddrive",
46 "pcloud drive",
47 // MEGA
48 "mega",
49 "megasync",
50 // Resilio Sync
51 "resilio sync",
52 // Sync.com
53 "sync",
54];
55
56/// Normalise a path component for comparison against
57/// `CLOUD_SYNC_NAMES`. Two passes:
58///
59/// 1. **NFKC normalisation** — collapses compatibility variants
60/// to their canonical form. Maps full-width Latin (e.g.
61/// `dropbox` U+FF44 …) and ligatures (`ffi` → `ffi`) onto
62/// the ASCII shape so a path component that *looks* like
63/// "dropbox" but uses fancy codepoints is detected.
64/// 2. **ASCII case-folding** — lowercase via `to_lowercase`.
65///
66/// What this **does NOT** catch: script-mixed confusables —
67/// Cyrillic 'о' (U+043E) and Latin 'o' (U+006F) are visually
68/// identical but live in different Unicode blocks and have no
69/// compatibility mapping. NFKC leaves them alone. Defending
70/// against those needs a confusable-detection pass (Unicode
71/// Technical Standard #39) which is out of scope for v0.3 — the
72/// dependency tree of `unicode-security` is heavier than the
73/// hardening it adds for this use case.
74fn canonicalize_for_match(s: &str) -> String {
75 s.nfkc().collect::<String>().to_lowercase()
76}
77
78/// Validate that `path` (a candidate Solo data dir) is safe.
79///
80/// Checks:
81/// 1. No ancestor path component is a known cloud-sync folder.
82/// 2. Path is absolute (otherwise the cloud-sync check is unreliable —
83/// a relative path could resolve into a cloud folder depending on cwd).
84///
85/// Returns Ok(()) on success, Err(Error::InvalidInput) with a clear message
86/// on failure. Existence of the path is NOT required — `solo init` creates it.
87pub fn validate_data_dir(path: &Path) -> Result<()> {
88 if !path.is_absolute() {
89 return Err(Error::invalid_input(format!(
90 "data dir must be an absolute path: got {}",
91 path.display()
92 )));
93 }
94
95 for component in path.components() {
96 match component {
97 Component::Normal(os_name) => {
98 let name_lc = canonicalize_for_match(&os_name.to_string_lossy());
99 if CLOUD_SYNC_NAMES.iter().any(|&n| name_lc == n) {
100 return Err(Error::invalid_input(format!(
101 "refusing to initialize Solo inside a cloud-sync folder: \
102 `{}` (component `{}` matches known cloud-sync clients). \
103 SQLCipher + cloud sync corrupts databases. \
104 Choose a local-only path (e.g., ~/.solo).",
105 path.display(),
106 name_lc
107 )));
108 }
109 }
110 // Windows UNC paths encode the share name in a Prefix component
111 // (e.g. \\server\Dropbox\... → Prefix("\\server\Dropbox")). The
112 // Display impl emits the full prefix string; pattern-match against
113 // each `\` segment so a share named "Dropbox" or "OneDrive" gets
114 // caught the same way as a Normal "Dropbox" component would.
115 #[cfg(windows)]
116 Component::Prefix(prefix) => {
117 let prefix_raw = prefix.as_os_str().to_string_lossy();
118 // NFKC + lowercase per-segment, same shape as the
119 // Normal-component branch above.
120 for segment in prefix_raw.split(['\\', '/']) {
121 if segment.is_empty() {
122 continue;
123 }
124 let segment_norm = canonicalize_for_match(segment);
125 if CLOUD_SYNC_NAMES.iter().any(|&n| segment_norm == n) {
126 return Err(Error::invalid_input(format!(
127 "refusing to initialize Solo inside a cloud-sync folder: \
128 `{}` (UNC prefix segment `{}` matches known cloud-sync clients). \
129 SQLCipher + cloud sync corrupts databases. \
130 Choose a local-only path (e.g., ~/.solo).",
131 path.display(),
132 segment_norm
133 )));
134 }
135 }
136 }
137 _ => {}
138 }
139 }
140
141 Ok(())
142}
143
144#[cfg(test)]
145mod tests {
146 use super::*;
147 use std::path::PathBuf;
148
149 /// Construct a platform-absolute path from a forward-slash suffix. Tests
150 /// describe path *components* (cloud-sync detection is component-level)
151 /// and stay portable: Unix gets a `/` prefix, Windows gets `C:\` and
152 /// backslash separators. Without this helper the literal `/Users/...`
153 /// strings fail the `is_absolute` check on Windows.
154 fn abs(suffix: &str) -> PathBuf {
155 #[cfg(windows)]
156 {
157 let win = suffix.replace('/', "\\");
158 PathBuf::from(format!("C:\\{win}"))
159 }
160 #[cfg(not(windows))]
161 {
162 PathBuf::from(format!("/{suffix}"))
163 }
164 }
165
166 #[test]
167 fn rejects_dropbox_root() {
168 let p = abs("Users/alice/Dropbox/solo");
169 let err = validate_data_dir(&p).unwrap_err();
170 assert!(err.to_string().contains("cloud-sync"), "got: {err}");
171 assert!(err.to_string().contains("dropbox"), "got: {err}");
172 }
173
174 #[test]
175 fn rejects_onedrive_with_org_suffix() {
176 let p = abs("Users/bob/OneDrive/solo");
177 // The OneDrive component itself; the OneDrive - Acme variant has a
178 // different leading component — we test that one separately if needed.
179 assert!(validate_data_dir(&p).is_err());
180 }
181
182 #[test]
183 fn rejects_icloud_drive() {
184 let p = abs("Users/c/Library/Mobile Documents/com~apple~CloudDocs/solo");
185 assert!(validate_data_dir(&p).is_err());
186 }
187
188 #[test]
189 fn rejects_case_variations() {
190 let p1 = abs("Users/d/DROPBOX/solo");
191 let p2 = abs("Users/d/dropbox/solo");
192 let p3 = abs("Users/d/Dropbox/solo");
193 assert!(validate_data_dir(&p1).is_err());
194 assert!(validate_data_dir(&p2).is_err());
195 assert!(validate_data_dir(&p3).is_err());
196 }
197
198 #[test]
199 fn accepts_dot_solo() {
200 let p = abs("home/eve/.solo");
201 assert!(validate_data_dir(&p).is_ok());
202 }
203
204 #[test]
205 fn accepts_explicit_local_path() {
206 let p = abs("var/lib/solo");
207 assert!(validate_data_dir(&p).is_ok());
208 }
209
210 #[test]
211 fn rejects_relative_path() {
212 let p = PathBuf::from(".solo");
213 let err = validate_data_dir(&p).unwrap_err();
214 assert!(err.to_string().contains("absolute"), "got: {err}");
215 }
216
217 #[test]
218 fn no_match_on_substring_within_a_component() {
219 // "dropboxlike" is NOT "dropbox" — we match whole components only.
220 let p = abs("home/f/dropboxlike/solo");
221 assert!(validate_data_dir(&p).is_ok());
222 }
223
224 #[test]
225 fn rejects_dropbox_with_unicode_case_variants() {
226 // NFKC + lowercase catches compatibility variants such as
227 // full-width Latin and ligatures. It does **NOT** catch
228 // script-mixed confusables — Cyrillic 'о' (U+043E) is
229 // visually identical to Latin 'o' (U+006F) but lives in a
230 // different Unicode block with no compatibility mapping, so
231 // NFKC leaves it alone. Documented limitation; fix would
232 // require a confusable-detection pass (UTS #39) and a
233 // heavier dep (`unicode-security`).
234 let p_cyrillic = abs("Users/x/dr\u{043e}pbox/solo"); // о = U+043E (Cyrillic 'o')
235 assert!(
236 validate_data_dir(&p_cyrillic).is_ok(),
237 "NFKC does not catch script-mixed confusables — \
238 documented behaviour, fix would need UTS #39 confusable detection"
239 );
240 }
241
242 /// Positive case for the NFKC pass: a "dropbox" path component
243 /// written with **full-width Latin** (codepoints in the
244 /// FFxx block) gets folded to ASCII by NFKC and then matches
245 /// the lowercase "dropbox" entry in `CLOUD_SYNC_NAMES`.
246 #[test]
247 fn rejects_full_width_latin_dropbox_via_nfkc() {
248 // U+FF24 = "D", U+FF52 = "r", etc. The full string
249 // "Dropbox" is visually identical to "Dropbox" but
250 // uses 7 different codepoints. NFKC maps each to its ASCII
251 // counterpart.
252 let p = abs("Users/z/\u{FF24}\u{FF52}\u{FF4F}\u{FF50}\u{FF42}\u{FF4F}\u{FF58}/solo");
253 let err = validate_data_dir(&p).unwrap_err();
254 assert!(
255 err.to_string().contains("cloud-sync"),
256 "NFKC should fold full-width Latin to ASCII; got: {err}"
257 );
258 }
259
260 /// Positive case for ligature folding: "ffi" (U+FB03) is the
261 /// Latin small ligature ffi; NFKC decomposes it to "ffi". A
262 /// hypothetical cloud-sync provider whose folder contains a
263 /// ligature wouldn't bypass the matcher (none of our
264 /// CLOUD_SYNC_NAMES contain ffi today, but the test pins the
265 /// NFKC behaviour for future entries).
266 #[test]
267 fn nfkc_decomposes_ligatures() {
268 let normalised = canonicalize_for_match("o\u{FB03}ce"); // "office"
269 assert_eq!(normalised, "office");
270 }
271
272 #[test]
273 fn rejects_box_dot_com_via_box_component() {
274 let p = abs("Users/y/Box/solo");
275 assert!(validate_data_dir(&p).is_err());
276 }
277
278 #[test]
279 fn empty_path_is_rejected_as_non_absolute() {
280 let p = PathBuf::new();
281 let err = validate_data_dir(&p).unwrap_err();
282 assert!(err.to_string().contains("absolute"), "got: {err}");
283 }
284
285 #[test]
286 fn windows_unc_path_share_name_is_caught() {
287 // UNC paths like \\server\share\... encode the share name in a
288 // Path::Prefix component. We split the prefix's lowercased
289 // string on \ or / and match each segment against
290 // CLOUD_SYNC_NAMES — so a share literally named "Dropbox"
291 // is rejected, same as a Normal "Dropbox" component.
292 #[cfg(windows)]
293 {
294 let p_share = PathBuf::from(r"\\fileserver\Dropbox\team\solo");
295 let err = validate_data_dir(&p_share).unwrap_err();
296 assert!(
297 err.to_string().contains("UNC prefix segment"),
298 "expected UNC-specific error, got: {err}"
299 );
300
301 // OneDrive share too.
302 let p_onedrive = PathBuf::from(r"\\nas\OneDrive\users\me\solo");
303 assert!(validate_data_dir(&p_onedrive).is_err());
304
305 // Cloud-sync names elsewhere in the path also caught
306 // (Normal-component path).
307 let p_inner = PathBuf::from(r"\\fileserver\share\Dropbox\solo");
308 assert!(validate_data_dir(&p_inner).is_err());
309
310 // Benign UNC share is allowed.
311 let p_ok = PathBuf::from(r"\\fileserver\backup\team\solo");
312 assert!(validate_data_dir(&p_ok).is_ok());
313 }
314 }
315}