memvid_cli/analytics/
id.rs

1//! Anonymous ID generation for analytics
2//!
3//! Creates SHA256-based anonymous identifiers that cannot be reversed
4//! but are consistent for the same user/file combination.
5
6use sha2::{Digest, Sha256};
7use std::sync::OnceLock;
8
9/// Cached machine ID
10static MACHINE_ID: OnceLock<String> = OnceLock::new();
11
12/// Get or generate a stable machine identifier
13/// Uses hostname + username hash for privacy
14fn get_machine_id() -> &'static str {
15    MACHINE_ID.get_or_init(|| {
16        let mut hasher = Sha256::new();
17
18        // Add hostname
19        if let Ok(hostname) = hostname::get() {
20            hasher.update(hostname.to_string_lossy().as_bytes());
21        }
22
23        // Add username
24        hasher.update(whoami::username().as_bytes());
25
26        // Add home directory for uniqueness
27        if let Some(home) = dirs::home_dir() {
28            hasher.update(home.to_string_lossy().as_bytes());
29        }
30
31        // Add a static salt
32        hasher.update(b"memvid_telemetry_v1");
33
34        let result = hasher.finalize();
35        format!("{:x}", result)[..16].to_string()
36    })
37}
38
39/// Generate an anonymous ID for a user
40/// Format: `anon_` + SHA256(machine_id + optional_file)[:16]
41///
42/// For paid users with API key, use the API key prefix instead
43pub fn generate_anon_id(file_path: Option<&str>) -> String {
44    // Check for API key (paid user)
45    if let Ok(api_key) = std::env::var("MEMVID_API_KEY") {
46        if api_key.len() >= 8 {
47            let prefix = &api_key[..8];
48            let mut hasher = Sha256::new();
49            hasher.update(prefix.as_bytes());
50            hasher.update(b"memvid_paid_v1");
51            let result = hasher.finalize();
52            return format!("paid_{:x}", result)[..21].to_string(); // paid_ + 16 chars
53        }
54    }
55
56    // Free user - use machine ID
57    let machine_id = get_machine_id();
58    let mut hasher = Sha256::new();
59    hasher.update(machine_id.as_bytes());
60
61    // Add file path if provided for more granularity
62    if let Some(path) = file_path {
63        hasher.update(path.as_bytes());
64    }
65
66    hasher.update(b"memvid_anon_v1");
67    let result = hasher.finalize();
68    format!("anon_{:x}", result)[..21].to_string() // anon_ + 16 chars
69}
70
71/// Generate a hash for a file path
72/// Used to track file activity without revealing the actual path
73pub fn generate_file_hash(file_path: &str) -> String {
74    let mut hasher = Sha256::new();
75
76    // Normalize the path
77    let normalized = std::path::Path::new(file_path)
78        .canonicalize()
79        .map(|p| p.to_string_lossy().to_string())
80        .unwrap_or_else(|_| file_path.to_string());
81
82    hasher.update(normalized.as_bytes());
83    hasher.update(b"memvid_file_v1");
84    let result = hasher.finalize();
85    format!("{:x}", result)[..16].to_string()
86}
87
88#[cfg(test)]
89mod tests {
90    use super::*;
91
92    #[test]
93    fn test_anon_id_format() {
94        let id = generate_anon_id(None);
95        assert!(id.starts_with("anon_") || id.starts_with("paid_"));
96        assert_eq!(id.len(), 21);
97    }
98
99    #[test]
100    fn test_anon_id_consistency() {
101        let id1 = generate_anon_id(Some("/test/file.mv2"));
102        let id2 = generate_anon_id(Some("/test/file.mv2"));
103        assert_eq!(id1, id2);
104    }
105
106    #[test]
107    fn test_file_hash_format() {
108        let hash = generate_file_hash("/test/file.mv2");
109        assert_eq!(hash.len(), 16);
110    }
111}