Skip to main content

subx_cli/core/matcher/
cache.rs

1//! Caching utilities for the file matching engine.
2//!
3//! Defines cache data structures and operations to store and retrieve
4//! previous matching results for faster repeated execution.
5//!
6//! # Examples
7//!
8//! ```rust
9//! use subx_cli::core::matcher::cache::{CacheData, SnapshotItem, OpItem};
10//! // Load existing cache or initialize a new one
11//! ```
12
13use serde::{Deserialize, Serialize};
14use std::path::PathBuf;
15
16/// Snapshot item representing a file state for directory comparison.
17///
18/// Used to detect changes in the filesystem since the last cache update.
19/// Contains essential file metadata for comparison purposes.
20#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
21pub struct SnapshotItem {
22    /// Canonical absolute path to the file.
23    ///
24    /// Defaults to an empty string when deserializing cache files produced
25    /// by older SubX versions that did not record the full path.
26    #[serde(default)]
27    pub path: String,
28    /// File name (without path)
29    pub name: String,
30    /// File size in bytes
31    pub size: u64,
32    /// Last modification time as Unix timestamp
33    pub mtime: u64,
34    /// File type classification (e.g., "video", "subtitle")
35    pub file_type: String,
36}
37
38/// Describes a file whose on-disk state no longer matches the cached snapshot.
39#[derive(Debug, Clone, PartialEq, Eq)]
40pub struct StaleFile {
41    /// The absolute path of the file (as recorded in the snapshot).
42    pub path: String,
43    /// Human-readable reason explaining why the entry is stale.
44    pub reason: String,
45}
46
47/// Single match operation cache item storing result details.
48///
49/// Represents a cached match operation between a video and subtitle file,
50/// including all the analysis results and metadata.
51#[derive(Debug, Serialize, Deserialize, Clone)]
52pub struct OpItem {
53    /// Path to the video file
54    pub video_file: String,
55    /// Path to the subtitle file
56    pub subtitle_file: String,
57    /// The new name assigned to the subtitle file
58    pub new_subtitle_name: String,
59    /// Confidence score of the match (0.0 to 1.0)
60    pub confidence: f32,
61    /// List of reasoning factors for this match
62    pub reasoning: Vec<String>,
63}
64
65/// Dry-run cache data structure containing snapshot and match history.
66///
67/// Stores the complete state of a directory scan and match operations,
68/// enabling efficient incremental processing and result caching.
69#[derive(Debug, Serialize, Deserialize, Clone)]
70pub struct CacheData {
71    /// Version of the cache format for compatibility checking
72    pub cache_version: String,
73    /// Path to the directory that was processed
74    pub directory: String,
75    /// Snapshot of all files found during scanning
76    pub file_snapshot: Vec<SnapshotItem>,
77    /// List of all match operations performed
78    pub match_operations: Vec<OpItem>,
79    /// Timestamp when the cache was created
80    pub created_at: u64,
81    /// AI model used for matching operations
82    pub ai_model_used: String,
83    /// Hash of configuration used for matching
84    pub config_hash: String,
85    /// Records the relocation mode when the cache was generated
86    #[serde(default)]
87    pub original_relocation_mode: String,
88    /// Records whether backup was enabled when the cache was generated
89    #[serde(default)]
90    pub original_backup_enabled: bool,
91}
92
93impl CacheData {
94    /// Loads cache data from the specified file path.
95    pub fn load(path: &std::path::Path) -> Result<Self, anyhow::Error> {
96        let content = std::fs::read_to_string(path)?;
97        let data = serde_json::from_str(&content)?;
98        Ok(data)
99    }
100
101    /// Returns `true` when the cache carries no file snapshot entries.
102    ///
103    /// Legacy caches generated before snapshot population was implemented
104    /// will report `true`; callers should treat such caches as unable to
105    /// perform freshness validation.
106    pub fn has_empty_snapshot(&self) -> bool {
107        self.file_snapshot.is_empty()
108    }
109
110    /// Validates every snapshot entry against the current filesystem state.
111    ///
112    /// Returns a list of [`StaleFile`] records describing each file whose
113    /// on-disk state diverges from the snapshot (missing, size mismatch, or
114    /// modification-time mismatch). An empty result indicates that the
115    /// snapshot is still consistent with the filesystem.
116    pub fn validate_snapshot(&self) -> Vec<StaleFile> {
117        let mut stale = Vec::new();
118        for item in &self.file_snapshot {
119            if item.path.is_empty() {
120                stale.push(StaleFile {
121                    path: item.name.clone(),
122                    reason: "snapshot entry missing canonical path".to_string(),
123                });
124                continue;
125            }
126
127            let path = std::path::Path::new(&item.path);
128            let metadata = match std::fs::metadata(path) {
129                Ok(m) => m,
130                Err(e) => {
131                    stale.push(StaleFile {
132                        path: item.path.clone(),
133                        reason: format!("file missing or inaccessible: {}", e),
134                    });
135                    continue;
136                }
137            };
138
139            if metadata.len() != item.size {
140                stale.push(StaleFile {
141                    path: item.path.clone(),
142                    reason: format!(
143                        "size changed (snapshot={}, current={})",
144                        item.size,
145                        metadata.len()
146                    ),
147                });
148                continue;
149            }
150
151            let current_mtime = metadata
152                .modified()
153                .ok()
154                .and_then(|m| m.duration_since(std::time::UNIX_EPOCH).ok())
155                .map(|d| d.as_secs())
156                .unwrap_or(0);
157            if current_mtime != item.mtime {
158                stale.push(StaleFile {
159                    path: item.path.clone(),
160                    reason: format!(
161                        "mtime changed (snapshot={}, current={})",
162                        item.mtime, current_mtime
163                    ),
164                });
165            }
166        }
167        stale
168    }
169
170    /// Checks whether any planned subtitle target path already exists on disk.
171    ///
172    /// For every cached operation, the target path is computed based on the
173    /// recorded relocation mode: when the mode is anything other than
174    /// `"None"`, the target resides next to the video file; otherwise it
175    /// stays alongside the original subtitle file. Returns the list of
176    /// target paths that already exist and would therefore conflict with
177    /// applying the cached plan.
178    pub fn validate_target_paths(&self) -> Vec<PathBuf> {
179        let mut conflicts = Vec::new();
180        let relocation_mode = self.original_relocation_mode.as_str();
181        let relocates = !matches!(relocation_mode, "" | "None");
182
183        for op in &self.match_operations {
184            let parent = if relocates {
185                std::path::Path::new(&op.video_file).parent()
186            } else {
187                std::path::Path::new(&op.subtitle_file).parent()
188            };
189
190            let Some(parent) = parent else { continue };
191            let target = parent.join(&op.new_subtitle_name);
192
193            let source = std::path::Path::new(&op.subtitle_file);
194            if target.exists() && target != source {
195                conflicts.push(target);
196            }
197        }
198        conflicts
199    }
200}
201
202#[cfg(test)]
203mod tests {
204    use super::*;
205    use std::fs;
206    use std::io::Write;
207    use std::time::{SystemTime, UNIX_EPOCH};
208    use tempfile::tempdir;
209
210    fn snapshot_for(path: &std::path::Path, file_type: &str) -> SnapshotItem {
211        let meta = fs::metadata(path).unwrap();
212        let mtime = meta
213            .modified()
214            .unwrap()
215            .duration_since(UNIX_EPOCH)
216            .unwrap()
217            .as_secs();
218        SnapshotItem {
219            path: path.to_string_lossy().to_string(),
220            name: path.file_name().unwrap().to_string_lossy().to_string(),
221            size: meta.len(),
222            mtime,
223            file_type: file_type.to_string(),
224        }
225    }
226
227    fn make_cache(snapshot: Vec<SnapshotItem>, ops: Vec<OpItem>, mode: &str) -> CacheData {
228        CacheData {
229            cache_version: "1.0".to_string(),
230            directory: String::new(),
231            file_snapshot: snapshot,
232            match_operations: ops,
233            created_at: SystemTime::now()
234                .duration_since(UNIX_EPOCH)
235                .unwrap()
236                .as_secs(),
237            ai_model_used: "test".to_string(),
238            config_hash: "hash".to_string(),
239            original_relocation_mode: mode.to_string(),
240            original_backup_enabled: false,
241        }
242    }
243
244    #[test]
245    fn validate_snapshot_returns_empty_when_files_match() {
246        let dir = tempdir().unwrap();
247        let video = dir.path().join("movie.mkv");
248        let subtitle = dir.path().join("movie.srt");
249        fs::write(&video, b"video").unwrap();
250        fs::write(&subtitle, b"sub").unwrap();
251
252        let snapshot = vec![
253            snapshot_for(&video, "video"),
254            snapshot_for(&subtitle, "subtitle"),
255        ];
256        let cache = make_cache(snapshot, vec![], "None");
257        assert!(cache.validate_snapshot().is_empty());
258    }
259
260    #[test]
261    fn validate_snapshot_detects_modified_file() {
262        let dir = tempdir().unwrap();
263        let video = dir.path().join("movie.mkv");
264        fs::write(&video, b"video").unwrap();
265        let snap = snapshot_for(&video, "video");
266
267        // Modify file contents so size changes.
268        let mut f = fs::OpenOptions::new().write(true).open(&video).unwrap();
269        f.write_all(b"video-edited-and-grown").unwrap();
270        drop(f);
271
272        let cache = make_cache(vec![snap], vec![], "None");
273        let stale = cache.validate_snapshot();
274        assert_eq!(stale.len(), 1);
275        assert!(stale[0].reason.contains("size changed"));
276    }
277
278    #[test]
279    fn validate_snapshot_detects_missing_file() {
280        let dir = tempdir().unwrap();
281        let video = dir.path().join("movie.mkv");
282        fs::write(&video, b"video").unwrap();
283        let snap = snapshot_for(&video, "video");
284        fs::remove_file(&video).unwrap();
285
286        let cache = make_cache(vec![snap], vec![], "None");
287        let stale = cache.validate_snapshot();
288        assert_eq!(stale.len(), 1);
289        assert!(stale[0].reason.contains("missing"));
290    }
291
292    #[test]
293    fn validate_target_paths_returns_empty_when_no_conflict() {
294        let dir = tempdir().unwrap();
295        let video = dir.path().join("movie.mkv");
296        let subtitle = dir.path().join("original.srt");
297        fs::write(&video, b"video").unwrap();
298        fs::write(&subtitle, b"sub").unwrap();
299
300        let op = OpItem {
301            video_file: video.to_string_lossy().to_string(),
302            subtitle_file: subtitle.to_string_lossy().to_string(),
303            new_subtitle_name: "movie.srt".to_string(),
304            confidence: 0.9,
305            reasoning: vec![],
306        };
307        let cache = make_cache(vec![], vec![op], "None");
308        assert!(cache.validate_target_paths().is_empty());
309    }
310
311    #[test]
312    fn validate_target_paths_detects_existing_target() {
313        let dir = tempdir().unwrap();
314        let video = dir.path().join("movie.mkv");
315        let subtitle = dir.path().join("original.srt");
316        let existing = dir.path().join("movie.srt");
317        fs::write(&video, b"video").unwrap();
318        fs::write(&subtitle, b"sub").unwrap();
319        fs::write(&existing, b"conflict").unwrap();
320
321        let op = OpItem {
322            video_file: video.to_string_lossy().to_string(),
323            subtitle_file: subtitle.to_string_lossy().to_string(),
324            new_subtitle_name: "movie.srt".to_string(),
325            confidence: 0.9,
326            reasoning: vec![],
327        };
328        let cache = make_cache(vec![], vec![op], "None");
329        let conflicts = cache.validate_target_paths();
330        assert_eq!(conflicts.len(), 1);
331        assert_eq!(conflicts[0], existing);
332    }
333
334    #[test]
335    fn validate_target_paths_uses_video_dir_when_relocating() {
336        let dir = tempdir().unwrap();
337        let video_dir = dir.path().join("videos");
338        let sub_dir = dir.path().join("subs");
339        fs::create_dir_all(&video_dir).unwrap();
340        fs::create_dir_all(&sub_dir).unwrap();
341
342        let video = video_dir.join("movie.mkv");
343        let subtitle = sub_dir.join("original.srt");
344        let target = video_dir.join("movie.srt");
345        fs::write(&video, b"video").unwrap();
346        fs::write(&subtitle, b"sub").unwrap();
347        fs::write(&target, b"conflict").unwrap();
348
349        let op = OpItem {
350            video_file: video.to_string_lossy().to_string(),
351            subtitle_file: subtitle.to_string_lossy().to_string(),
352            new_subtitle_name: "movie.srt".to_string(),
353            confidence: 0.9,
354            reasoning: vec![],
355        };
356        let cache = make_cache(vec![], vec![op], "Copy");
357        let conflicts = cache.validate_target_paths();
358        assert_eq!(conflicts, vec![target]);
359    }
360
361    #[test]
362    fn has_empty_snapshot_reports_legacy_caches() {
363        let legacy = make_cache(vec![], vec![], "None");
364        assert!(legacy.has_empty_snapshot());
365
366        let dir = tempdir().unwrap();
367        let video = dir.path().join("movie.mkv");
368        fs::write(&video, b"video").unwrap();
369        let populated = make_cache(vec![snapshot_for(&video, "video")], vec![], "None");
370        assert!(!populated.has_empty_snapshot());
371    }
372}