Skip to main content

solo_storage/
path_validation.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! Refuse to initialize Solo inside a cloud-sync folder.
4//!
5//! Why: SQLCipher + cloud sync is a corruption time-bomb. Dropbox/OneDrive/etc.
6//! sync mid-write, sync the WAL but not the main DB, and produce silently
7//! broken files. We refuse upfront with a clear message — much better than
8//! discovering it after the user's memory is gone.
9//!
10//! The check is heuristic by design (a `Solo` folder named after a cloud
11//! provider is rare; misclassifying a real cloud folder as safe is dangerous).
12//! False positives are recoverable (`--allow-cloud-sync` flag, future work);
13//! false negatives are catastrophic.
14//!
15//! Detection: walks every ancestor path component and tests each against a
16//! list of known cloud-sync folder names. Matching is case-insensitive (NTFS
17//! and HFS+ are case-insensitive at the OS level; ext4 isn't, but typing
18//! `~/dropbox/...` is still risky).
19
20use solo_core::{Error, Result};
21use std::path::{Component, Path};
22use unicode_normalization::UnicodeNormalization;
23
24/// Folder names produced by the most common cloud-sync clients. Case-folded
25/// during comparison. If you add a new entry, prefer the literal folder name
26/// the client creates rather than its branding.
27const CLOUD_SYNC_NAMES: &[&str] = &[
28    // Dropbox: ~/Dropbox, ~/Dropbox (Personal), ~/Dropbox (Business)
29    "dropbox",
30    // OneDrive: ~/OneDrive, ~/OneDrive - <org name>
31    "onedrive",
32    // Google Drive — desktop client mounts under various names
33    "google drive",
34    "googledrive",
35    "my drive",
36    // iCloud Drive — both the user-facing folder and the macOS internal path
37    "icloud drive",
38    "icloud",
39    "icloud~com~apple~clouddocs",
40    "mobile documents",
41    // Box.com
42    "box",
43    "box sync",
44    // pCloud
45    "pclouddrive",
46    "pcloud drive",
47    // MEGA
48    "mega",
49    "megasync",
50    // Resilio Sync
51    "resilio sync",
52    // Sync.com
53    "sync",
54];
55
56/// Normalise a path component for comparison against
57/// `CLOUD_SYNC_NAMES`. Two passes:
58///
59///   1. **NFKC normalisation** — collapses compatibility variants
60///      to their canonical form. Maps full-width Latin (e.g.
61///      `dropbox` U+FF44 …) and ligatures (`ffi` → `ffi`) onto
62///      the ASCII shape so a path component that *looks* like
63///      "dropbox" but uses fancy codepoints is detected.
64///   2. **ASCII case-folding** — lowercase via `to_lowercase`.
65///
66/// What this **does NOT** catch: script-mixed confusables —
67/// Cyrillic 'о' (U+043E) and Latin 'o' (U+006F) are visually
68/// identical but live in different Unicode blocks and have no
69/// compatibility mapping. NFKC leaves them alone. Defending
70/// against those needs a confusable-detection pass (Unicode
71/// Technical Standard #39) which is out of scope for v0.3 — the
72/// dependency tree of `unicode-security` is heavier than the
73/// hardening it adds for this use case.
74fn canonicalize_for_match(s: &str) -> String {
75    s.nfkc().collect::<String>().to_lowercase()
76}
77
78/// Validate that `path` (a candidate Solo data dir) is safe.
79///
80/// Checks:
81/// 1. No ancestor path component is a known cloud-sync folder.
82/// 2. Path is absolute (otherwise the cloud-sync check is unreliable —
83///    a relative path could resolve into a cloud folder depending on cwd).
84///
85/// Returns Ok(()) on success, Err(Error::InvalidInput) with a clear message
86/// on failure. Existence of the path is NOT required — `solo init` creates it.
87pub fn validate_data_dir(path: &Path) -> Result<()> {
88    if !path.is_absolute() {
89        return Err(Error::invalid_input(format!(
90            "data dir must be an absolute path: got {}",
91            path.display()
92        )));
93    }
94
95    for component in path.components() {
96        match component {
97            Component::Normal(os_name) => {
98                let name_lc = canonicalize_for_match(&os_name.to_string_lossy());
99                if CLOUD_SYNC_NAMES.iter().any(|&n| name_lc == n) {
100                    return Err(Error::invalid_input(format!(
101                        "refusing to initialize Solo inside a cloud-sync folder: \
102                         `{}` (component `{}` matches known cloud-sync clients). \
103                         SQLCipher + cloud sync corrupts databases. \
104                         Choose a local-only path (e.g., ~/.solo).",
105                        path.display(),
106                        name_lc
107                    )));
108                }
109            }
110            // Windows UNC paths encode the share name in a Prefix component
111            // (e.g. \\server\Dropbox\... → Prefix("\\server\Dropbox")). The
112            // Display impl emits the full prefix string; pattern-match against
113            // each `\` segment so a share named "Dropbox" or "OneDrive" gets
114            // caught the same way as a Normal "Dropbox" component would.
115            #[cfg(windows)]
116            Component::Prefix(prefix) => {
117                let prefix_raw = prefix.as_os_str().to_string_lossy();
118                // NFKC + lowercase per-segment, same shape as the
119                // Normal-component branch above.
120                for segment in prefix_raw.split(['\\', '/']) {
121                    if segment.is_empty() {
122                        continue;
123                    }
124                    let segment_norm = canonicalize_for_match(segment);
125                    if CLOUD_SYNC_NAMES.iter().any(|&n| segment_norm == n) {
126                        return Err(Error::invalid_input(format!(
127                            "refusing to initialize Solo inside a cloud-sync folder: \
128                             `{}` (UNC prefix segment `{}` matches known cloud-sync clients). \
129                             SQLCipher + cloud sync corrupts databases. \
130                             Choose a local-only path (e.g., ~/.solo).",
131                            path.display(),
132                            segment_norm
133                        )));
134                    }
135                }
136            }
137            _ => {}
138        }
139    }
140
141    Ok(())
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use std::path::PathBuf;
148
149    /// Construct a platform-absolute path from a forward-slash suffix. Tests
150    /// describe path *components* (cloud-sync detection is component-level)
151    /// and stay portable: Unix gets a `/` prefix, Windows gets `C:\` and
152    /// backslash separators. Without this helper the literal `/Users/...`
153    /// strings fail the `is_absolute` check on Windows.
154    fn abs(suffix: &str) -> PathBuf {
155        #[cfg(windows)]
156        {
157            let win = suffix.replace('/', "\\");
158            PathBuf::from(format!("C:\\{win}"))
159        }
160        #[cfg(not(windows))]
161        {
162            PathBuf::from(format!("/{suffix}"))
163        }
164    }
165
166    #[test]
167    fn rejects_dropbox_root() {
168        let p = abs("Users/alice/Dropbox/solo");
169        let err = validate_data_dir(&p).unwrap_err();
170        assert!(err.to_string().contains("cloud-sync"), "got: {err}");
171        assert!(err.to_string().contains("dropbox"), "got: {err}");
172    }
173
174    #[test]
175    fn rejects_onedrive_with_org_suffix() {
176        let p = abs("Users/bob/OneDrive/solo");
177        // The OneDrive component itself; the OneDrive - Acme variant has a
178        // different leading component — we test that one separately if needed.
179        assert!(validate_data_dir(&p).is_err());
180    }
181
182    #[test]
183    fn rejects_icloud_drive() {
184        let p = abs("Users/c/Library/Mobile Documents/com~apple~CloudDocs/solo");
185        assert!(validate_data_dir(&p).is_err());
186    }
187
188    #[test]
189    fn rejects_case_variations() {
190        let p1 = abs("Users/d/DROPBOX/solo");
191        let p2 = abs("Users/d/dropbox/solo");
192        let p3 = abs("Users/d/Dropbox/solo");
193        assert!(validate_data_dir(&p1).is_err());
194        assert!(validate_data_dir(&p2).is_err());
195        assert!(validate_data_dir(&p3).is_err());
196    }
197
198    #[test]
199    fn accepts_dot_solo() {
200        let p = abs("home/eve/.solo");
201        assert!(validate_data_dir(&p).is_ok());
202    }
203
204    #[test]
205    fn accepts_explicit_local_path() {
206        let p = abs("var/lib/solo");
207        assert!(validate_data_dir(&p).is_ok());
208    }
209
210    #[test]
211    fn rejects_relative_path() {
212        let p = PathBuf::from(".solo");
213        let err = validate_data_dir(&p).unwrap_err();
214        assert!(err.to_string().contains("absolute"), "got: {err}");
215    }
216
217    #[test]
218    fn no_match_on_substring_within_a_component() {
219        // "dropboxlike" is NOT "dropbox" — we match whole components only.
220        let p = abs("home/f/dropboxlike/solo");
221        assert!(validate_data_dir(&p).is_ok());
222    }
223
224    #[test]
225    fn rejects_dropbox_with_unicode_case_variants() {
226        // NFKC + lowercase catches compatibility variants such as
227        // full-width Latin and ligatures. It does **NOT** catch
228        // script-mixed confusables — Cyrillic 'о' (U+043E) is
229        // visually identical to Latin 'o' (U+006F) but lives in a
230        // different Unicode block with no compatibility mapping, so
231        // NFKC leaves it alone. Documented limitation; fix would
232        // require a confusable-detection pass (UTS #39) and a
233        // heavier dep (`unicode-security`).
234        let p_cyrillic = abs("Users/x/dr\u{043e}pbox/solo"); // о = U+043E (Cyrillic 'o')
235        assert!(
236            validate_data_dir(&p_cyrillic).is_ok(),
237            "NFKC does not catch script-mixed confusables — \
238             documented behaviour, fix would need UTS #39 confusable detection"
239        );
240    }
241
242    /// Positive case for the NFKC pass: a "dropbox" path component
243    /// written with **full-width Latin** (codepoints in the
244    /// FFxx block) gets folded to ASCII by NFKC and then matches
245    /// the lowercase "dropbox" entry in `CLOUD_SYNC_NAMES`.
246    #[test]
247    fn rejects_full_width_latin_dropbox_via_nfkc() {
248        // U+FF24 = "D", U+FF52 = "r", etc. The full string
249        // "Dropbox" is visually identical to "Dropbox" but
250        // uses 7 different codepoints. NFKC maps each to its ASCII
251        // counterpart.
252        let p = abs("Users/z/\u{FF24}\u{FF52}\u{FF4F}\u{FF50}\u{FF42}\u{FF4F}\u{FF58}/solo");
253        let err = validate_data_dir(&p).unwrap_err();
254        assert!(
255            err.to_string().contains("cloud-sync"),
256            "NFKC should fold full-width Latin to ASCII; got: {err}"
257        );
258    }
259
260    /// Positive case for ligature folding: "ffi" (U+FB03) is the
261    /// Latin small ligature ffi; NFKC decomposes it to "ffi". A
262    /// hypothetical cloud-sync provider whose folder contains a
263    /// ligature wouldn't bypass the matcher (none of our
264    /// CLOUD_SYNC_NAMES contain ffi today, but the test pins the
265    /// NFKC behaviour for future entries).
266    #[test]
267    fn nfkc_decomposes_ligatures() {
268        let normalised = canonicalize_for_match("o\u{FB03}ce"); // "office"
269        assert_eq!(normalised, "office");
270    }
271
272    #[test]
273    fn rejects_box_dot_com_via_box_component() {
274        let p = abs("Users/y/Box/solo");
275        assert!(validate_data_dir(&p).is_err());
276    }
277
278    #[test]
279    fn empty_path_is_rejected_as_non_absolute() {
280        let p = PathBuf::new();
281        let err = validate_data_dir(&p).unwrap_err();
282        assert!(err.to_string().contains("absolute"), "got: {err}");
283    }
284
285    #[test]
286    fn windows_unc_path_share_name_is_caught() {
287        // UNC paths like \\server\share\... encode the share name in a
288        // Path::Prefix component. We split the prefix's lowercased
289        // string on \ or / and match each segment against
290        // CLOUD_SYNC_NAMES — so a share literally named "Dropbox"
291        // is rejected, same as a Normal "Dropbox" component.
292        #[cfg(windows)]
293        {
294            let p_share = PathBuf::from(r"\\fileserver\Dropbox\team\solo");
295            let err = validate_data_dir(&p_share).unwrap_err();
296            assert!(
297                err.to_string().contains("UNC prefix segment"),
298                "expected UNC-specific error, got: {err}"
299            );
300
301            // OneDrive share too.
302            let p_onedrive = PathBuf::from(r"\\nas\OneDrive\users\me\solo");
303            assert!(validate_data_dir(&p_onedrive).is_err());
304
305            // Cloud-sync names elsewhere in the path also caught
306            // (Normal-component path).
307            let p_inner = PathBuf::from(r"\\fileserver\share\Dropbox\solo");
308            assert!(validate_data_dir(&p_inner).is_err());
309
310            // Benign UNC share is allowed.
311            let p_ok = PathBuf::from(r"\\fileserver\backup\team\solo");
312            assert!(validate_data_dir(&p_ok).is_ok());
313        }
314    }
315}