Skip to main content

st/
scanner_state.rs

1//
2// -----------------------------------------------------------------------------
3//  SCANNER STATE: Change Detection Between Scans
4//
5//  This module manages persistent state for Smart Tree's intelligent scanning.
6//  By remembering what we saw last time, we can tell you "what changed" instead
7//  of "everything that exists."
8//
9//  Key concepts:
10//  - ScanState: Persisted snapshot of a directory at a point in time
11//  - FileSignature: Hash + metadata for fast change detection
12//  - ScanDelta: The diff between two scans
13//  - HotDirectory: Directories with frequent changes worth watching
14//
15//  "Don't repeat what hasn't changed." - Omni
16// -----------------------------------------------------------------------------
17//
18
19use crate::scanner_interest::{ChangeType, InterestLevel};
20use anyhow::Result;
21use serde::{Deserialize, Serialize};
22use sha2::{Digest, Sha256};
23use std::collections::HashMap;
24use std::fs::File;
25use std::io::{BufReader, BufWriter, Read};
26use std::path::{Path, PathBuf};
27use std::time::SystemTime;
28
29/// File signature for change detection
30/// Uses a combination of hash and metadata for fast comparison
31#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
32pub struct FileSignature {
33    /// Blake3/SHA256 hash of file contents (for files < 10MB)
34    /// For larger files or directories, this is None
35    pub content_hash: Option<String>,
36
37    /// Last modification time
38    pub mtime: SystemTime,
39
40    /// File size in bytes
41    pub size: u64,
42
43    /// File permissions (Unix mode)
44    pub permissions: u32,
45
46    /// Is this a directory?
47    pub is_dir: bool,
48
49    /// Is this a symlink?
50    pub is_symlink: bool,
51}
52
53impl FileSignature {
54    /// Create a signature from a path
55    pub fn from_path(path: &Path) -> Result<Self> {
56        let metadata = std::fs::symlink_metadata(path)?;
57        let is_symlink = metadata.file_type().is_symlink();
58        let is_dir = metadata.is_dir();
59
60        // Get actual metadata (following symlinks if needed)
61        let (size, mtime, permissions) = if is_symlink {
62            // For symlinks, use symlink metadata
63            (
64                0,
65                metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
66                Self::get_permissions(&metadata),
67            )
68        } else {
69            (
70                metadata.len(),
71                metadata.modified().unwrap_or(SystemTime::UNIX_EPOCH),
72                Self::get_permissions(&metadata),
73            )
74        };
75
76        // Only hash small files (< 10MB)
77        let content_hash = if !is_dir && !is_symlink && size < 10_000_000 {
78            Self::hash_file(path).ok()
79        } else {
80            None
81        };
82
83        Ok(Self {
84            content_hash,
85            mtime,
86            size,
87            permissions,
88            is_dir,
89            is_symlink,
90        })
91    }
92
93    /// Quick check if file might have changed (without hashing)
94    pub fn quick_changed(&self, other: &Self) -> bool {
95        self.mtime != other.mtime || self.size != other.size || self.permissions != other.permissions
96    }
97
98    /// Full check if file has changed (including hash if available)
99    pub fn changed(&self, other: &Self) -> bool {
100        if self.quick_changed(other) {
101            return true;
102        }
103
104        // If both have hashes, compare them
105        match (&self.content_hash, &other.content_hash) {
106            (Some(h1), Some(h2)) => h1 != h2,
107            _ => false, // Can't determine from hash, assume unchanged
108        }
109    }
110
111    /// Hash a file's contents using SHA256
112    fn hash_file(path: &Path) -> Result<String> {
113        let file = File::open(path)?;
114        let mut reader = BufReader::new(file);
115        let mut hasher = Sha256::new();
116        let mut buffer = [0u8; 8192];
117
118        loop {
119            let bytes_read = reader.read(&mut buffer)?;
120            if bytes_read == 0 {
121                break;
122            }
123            hasher.update(&buffer[..bytes_read]);
124        }
125
126        Ok(hex::encode(hasher.finalize()))
127    }
128
129    #[cfg(unix)]
130    fn get_permissions(metadata: &std::fs::Metadata) -> u32 {
131        use std::os::unix::fs::PermissionsExt;
132        metadata.permissions().mode()
133    }
134
135    #[cfg(not(unix))]
136    fn get_permissions(_metadata: &std::fs::Metadata) -> u32 {
137        0o644 // Default permissions for non-Unix
138    }
139}
140
141/// Persistent state for a scanned directory
142#[derive(Debug, Clone, Serialize, Deserialize)]
143pub struct ScanState {
144    /// When this state was created
145    pub scan_time: SystemTime,
146
147    /// Root path that was scanned
148    pub root: PathBuf,
149
150    /// File signatures for all scanned files
151    pub signatures: HashMap<PathBuf, FileSignature>,
152
153    /// Directories marked as "hot" (frequently changing)
154    pub hot_directories: Vec<HotDirectory>,
155
156    /// Total files in this state
157    pub total_files: u64,
158
159    /// Total directories in this state
160    pub total_dirs: u64,
161
162    /// Version of the state format (for migrations)
163    pub version: u32,
164}
165
166impl ScanState {
167    /// Current state format version
168    pub const CURRENT_VERSION: u32 = 1;
169
170    /// Create a new empty state
171    pub fn new(root: PathBuf) -> Self {
172        Self {
173            scan_time: SystemTime::now(),
174            root,
175            signatures: HashMap::new(),
176            hot_directories: Vec::new(),
177            total_files: 0,
178            total_dirs: 0,
179            version: Self::CURRENT_VERSION,
180        }
181    }
182
183    /// Add a file signature to the state
184    pub fn add_signature(&mut self, path: PathBuf, sig: FileSignature) {
185        if sig.is_dir {
186            self.total_dirs += 1;
187        } else {
188            self.total_files += 1;
189        }
190        self.signatures.insert(path, sig);
191    }
192
193    /// Get the state file path for a given directory
194    pub fn state_path(root: &Path) -> PathBuf {
195        let state_dir = dirs::home_dir()
196            .unwrap_or_else(|| PathBuf::from("."))
197            .join(".st")
198            .join("scan_states");
199
200        // Create a safe filename from the path
201        let safe_name = root
202            .to_string_lossy()
203            .replace(['/', '\\', ':'], "_")
204            .trim_matches('_')
205            .to_string();
206
207        state_dir.join(format!("{}.state.json", safe_name))
208    }
209
210    /// Save state to disk
211    pub fn save(&self) -> Result<PathBuf> {
212        let path = Self::state_path(&self.root);
213
214        // Ensure directory exists
215        if let Some(parent) = path.parent() {
216            std::fs::create_dir_all(parent)?;
217        }
218
219        let file = File::create(&path)?;
220        let writer = BufWriter::new(file);
221        serde_json::to_writer_pretty(writer, self)?;
222
223        Ok(path)
224    }
225
226    /// Load state from disk
227    pub fn load(root: &Path) -> Result<Option<Self>> {
228        let path = Self::state_path(root);
229
230        if !path.exists() {
231            return Ok(None);
232        }
233
234        let file = File::open(&path)?;
235        let reader = BufReader::new(file);
236        let state: Self = serde_json::from_reader(reader)?;
237
238        // Check version compatibility
239        if state.version > Self::CURRENT_VERSION {
240            anyhow::bail!(
241                "State file version {} is newer than supported version {}",
242                state.version,
243                Self::CURRENT_VERSION
244            );
245        }
246
247        Ok(Some(state))
248    }
249
250    /// Compare with another state and produce a delta
251    pub fn diff(&self, newer: &ScanState) -> ScanDelta {
252        let mut delta = ScanDelta::new(self.root.clone());
253
254        // Find added and modified files
255        for (path, new_sig) in &newer.signatures {
256            match self.signatures.get(path) {
257                None => {
258                    // File was added
259                    delta.added.push(path.clone());
260                }
261                Some(old_sig) => {
262                    if new_sig.changed(old_sig) {
263                        // Determine type of change
264                        let change_type = if old_sig.permissions != new_sig.permissions
265                            && old_sig.size == new_sig.size
266                            && old_sig.content_hash == new_sig.content_hash
267                        {
268                            ChangeType::PermissionChanged
269                        } else if old_sig.is_dir != new_sig.is_dir
270                            || old_sig.is_symlink != new_sig.is_symlink
271                        {
272                            ChangeType::TypeChanged
273                        } else {
274                            ChangeType::Modified
275                        };
276                        delta.modified.push((path.clone(), change_type));
277                    }
278                }
279            }
280        }
281
282        // Find deleted files
283        for path in self.signatures.keys() {
284            if !newer.signatures.contains_key(path) {
285                delta.deleted.push(path.clone());
286            }
287        }
288
289        // Update summary
290        delta.nothing_changed =
291            delta.added.is_empty() && delta.modified.is_empty() && delta.deleted.is_empty();
292        delta.older_scan_time = Some(self.scan_time);
293        delta.newer_scan_time = Some(newer.scan_time);
294
295        delta
296    }
297}
298
299/// The difference between two scans
300#[derive(Debug, Clone, Serialize, Deserialize)]
301pub struct ScanDelta {
302    /// Root path for this delta
303    pub root: PathBuf,
304
305    /// Files that were added since last scan
306    pub added: Vec<PathBuf>,
307
308    /// Files that were modified (path and type of change)
309    pub modified: Vec<(PathBuf, ChangeType)>,
310
311    /// Files that were deleted since last scan
312    pub deleted: Vec<PathBuf>,
313
314    /// True if nothing changed at all
315    pub nothing_changed: bool,
316
317    /// Time of the older scan
318    pub older_scan_time: Option<SystemTime>,
319
320    /// Time of the newer scan
321    pub newer_scan_time: Option<SystemTime>,
322}
323
324impl ScanDelta {
325    /// Create a new empty delta
326    pub fn new(root: PathBuf) -> Self {
327        Self {
328            root,
329            added: Vec::new(),
330            modified: Vec::new(),
331            deleted: Vec::new(),
332            nothing_changed: true,
333            older_scan_time: None,
334            newer_scan_time: None,
335        }
336    }
337
338    /// Get total number of changes
339    pub fn change_count(&self) -> usize {
340        self.added.len() + self.modified.len() + self.deleted.len()
341    }
342
343    /// Get a summary string
344    pub fn summary(&self) -> String {
345        if self.nothing_changed {
346            return String::from("No changes since last scan");
347        }
348
349        let mut parts = Vec::new();
350
351        if !self.added.is_empty() {
352            parts.push(format!("+{} added", self.added.len()));
353        }
354        if !self.modified.is_empty() {
355            parts.push(format!("~{} modified", self.modified.len()));
356        }
357        if !self.deleted.is_empty() {
358            parts.push(format!("-{} deleted", self.deleted.len()));
359        }
360
361        parts.join(", ")
362    }
363
364    /// Get paths by interest level (for smart formatting)
365    pub fn paths_by_interest(&self) -> HashMap<InterestLevel, Vec<PathBuf>> {
366        let mut result: HashMap<InterestLevel, Vec<PathBuf>> = HashMap::new();
367
368        // Deleted files are important
369        for path in &self.deleted {
370            result
371                .entry(InterestLevel::Important)
372                .or_default()
373                .push(path.clone());
374        }
375
376        // Modified files are notable to important
377        for (path, change_type) in &self.modified {
378            let level = match change_type {
379                ChangeType::PermissionChanged => InterestLevel::Important,
380                ChangeType::TypeChanged => InterestLevel::Important,
381                _ => InterestLevel::Notable,
382            };
383            result.entry(level).or_default().push(path.clone());
384        }
385
386        // Added files are notable
387        for path in &self.added {
388            result
389                .entry(InterestLevel::Notable)
390                .or_default()
391                .push(path.clone());
392        }
393
394        result
395    }
396}
397
398/// A directory marked as "hot" due to frequent changes
399#[derive(Debug, Clone, Serialize, Deserialize)]
400pub struct HotDirectory {
401    /// Path to the hot directory
402    pub path: PathBuf,
403
404    /// Number of changes in the tracking period
405    pub change_count: u32,
406
407    /// When we started tracking this directory
408    pub tracking_since: SystemTime,
409
410    /// Average changes per day
411    pub changes_per_day: f32,
412
413    /// Most active hours (0-23)
414    pub active_hours: Vec<u8>,
415
416    /// Interest level based on activity
417    pub interest_level: InterestLevel,
418}
419
420impl HotDirectory {
421    /// Create a new hot directory entry
422    pub fn new(path: PathBuf) -> Self {
423        Self {
424            path,
425            change_count: 0,
426            tracking_since: SystemTime::now(),
427            changes_per_day: 0.0,
428            active_hours: Vec::new(),
429            interest_level: InterestLevel::Notable,
430        }
431    }
432
433    /// Record a change in this directory
434    pub fn record_change(&mut self) {
435        self.change_count += 1;
436
437        // Update changes per day
438        if let Ok(duration) = SystemTime::now().duration_since(self.tracking_since) {
439            let days = duration.as_secs_f32() / 86400.0;
440            if days > 0.0 {
441                self.changes_per_day = self.change_count as f32 / days;
442            }
443        }
444
445        // Update interest level based on activity
446        self.interest_level = if self.changes_per_day > 50.0 {
447            InterestLevel::Critical
448        } else if self.changes_per_day > 20.0 {
449            InterestLevel::Important
450        } else if self.changes_per_day > 5.0 {
451            InterestLevel::Notable
452        } else {
453            InterestLevel::Background
454        };
455    }
456
457    /// Check if this directory is considered "hot"
458    pub fn is_hot(&self) -> bool {
459        self.changes_per_day >= 10.0
460    }
461}
462
463/// Statistics about change detection
464#[derive(Debug, Clone, Default, Serialize, Deserialize)]
465pub struct ChangeStats {
466    /// Total files compared
467    pub files_compared: u64,
468
469    /// Files that were unchanged
470    pub unchanged: u64,
471
472    /// Files that were added
473    pub added: u64,
474
475    /// Files that were modified
476    pub modified: u64,
477
478    /// Files that were deleted
479    pub deleted: u64,
480
481    /// Time elapsed for comparison
482    pub comparison_time_ms: u64,
483}
484
485impl ChangeStats {
486    /// Get the percentage of files that changed
487    pub fn change_percentage(&self) -> f32 {
488        if self.files_compared == 0 {
489            return 0.0;
490        }
491        (self.added + self.modified + self.deleted) as f32 / self.files_compared as f32 * 100.0
492    }
493}
494
495#[cfg(test)]
496mod tests {
497    use super::*;
498        use tempfile::TempDir;
499
500    #[test]
501    fn test_file_signature_creation() {
502        let tmp = TempDir::new().unwrap();
503        let file_path = tmp.path().join("test.txt");
504        std::fs::write(&file_path, "Hello, world!").unwrap();
505
506        let sig = FileSignature::from_path(&file_path).unwrap();
507        assert!(!sig.is_dir);
508        assert!(!sig.is_symlink);
509        assert_eq!(sig.size, 13);
510        assert!(sig.content_hash.is_some());
511    }
512
513    #[test]
514    fn test_file_signature_change_detection() {
515        let tmp = TempDir::new().unwrap();
516        let file_path = tmp.path().join("test.txt");
517
518        // Create initial file
519        std::fs::write(&file_path, "Hello").unwrap();
520        let sig1 = FileSignature::from_path(&file_path).unwrap();
521
522        // Modify file
523        std::fs::write(&file_path, "Hello, world!").unwrap();
524        let sig2 = FileSignature::from_path(&file_path).unwrap();
525
526        assert!(sig2.changed(&sig1));
527    }
528
529    #[test]
530    fn test_scan_state_persistence() {
531        let tmp = TempDir::new().unwrap();
532        let root = tmp.path().to_path_buf();
533
534        // Create state
535        let mut state = ScanState::new(root.clone());
536        state.add_signature(
537            root.join("test.txt"),
538            FileSignature {
539                content_hash: Some("abc123".to_string()),
540                mtime: SystemTime::now(),
541                size: 100,
542                permissions: 0o644,
543                is_dir: false,
544                is_symlink: false,
545            },
546        );
547
548        // Save and reload
549        let save_path = state.save().unwrap();
550        assert!(save_path.exists());
551
552        let loaded = ScanState::load(&root).unwrap().unwrap();
553        assert_eq!(loaded.total_files, 1);
554        assert!(loaded.signatures.contains_key(&root.join("test.txt")));
555    }
556
557    #[test]
558    fn test_scan_delta() {
559        let root = PathBuf::from("/test");
560
561        // Create old state
562        let mut old_state = ScanState::new(root.clone());
563        old_state.add_signature(
564            root.join("unchanged.txt"),
565            FileSignature {
566                content_hash: Some("hash1".to_string()),
567                mtime: SystemTime::UNIX_EPOCH,
568                size: 100,
569                permissions: 0o644,
570                is_dir: false,
571                is_symlink: false,
572            },
573        );
574        old_state.add_signature(
575            root.join("deleted.txt"),
576            FileSignature {
577                content_hash: Some("hash2".to_string()),
578                mtime: SystemTime::UNIX_EPOCH,
579                size: 50,
580                permissions: 0o644,
581                is_dir: false,
582                is_symlink: false,
583            },
584        );
585
586        // Create new state
587        let mut new_state = ScanState::new(root.clone());
588        new_state.add_signature(
589            root.join("unchanged.txt"),
590            FileSignature {
591                content_hash: Some("hash1".to_string()),
592                mtime: SystemTime::UNIX_EPOCH,
593                size: 100,
594                permissions: 0o644,
595                is_dir: false,
596                is_symlink: false,
597            },
598        );
599        new_state.add_signature(
600            root.join("added.txt"),
601            FileSignature {
602                content_hash: Some("hash3".to_string()),
603                mtime: SystemTime::now(),
604                size: 200,
605                permissions: 0o644,
606                is_dir: false,
607                is_symlink: false,
608            },
609        );
610
611        let delta = old_state.diff(&new_state);
612
613        assert!(!delta.nothing_changed);
614        assert_eq!(delta.added.len(), 1);
615        assert_eq!(delta.deleted.len(), 1);
616        assert!(delta.modified.is_empty());
617        assert!(delta.added.contains(&root.join("added.txt")));
618        assert!(delta.deleted.contains(&root.join("deleted.txt")));
619    }
620
621    #[test]
622    fn test_hot_directory() {
623        let mut hot = HotDirectory::new(PathBuf::from("/src"));
624
625        // Record many changes
626        for _ in 0..100 {
627            hot.record_change();
628        }
629
630        assert!(hot.is_hot());
631        assert!(hot.changes_per_day > 0.0);
632    }
633}