Skip to main content

subx_cli/commands/
cache_command.rs

1//! Cache management command implementation.
2//!
3//! This module provides cache management functionality through the `cache`
4//! subcommand, enabling users to inspect, apply, rollback, and clear cached
5//! data from SubX operations.
6//!
7//! # Subcommands
8//!
9//! - **`cache status`** — display cache metadata (path, size, age, AI model,
10//!   operation count, config hash validity, snapshot freshness, journal presence).
11//!   Supports `--json` for machine-readable output.
12//! - **`cache apply`** — replay cached dry-run results without calling the AI
13//!   provider. Validates file snapshot and target paths, prompts for
14//!   confirmation, and writes a journal for rollback.
15//! - **`cache rollback`** — undo the most recent batch of file operations by
16//!   reading the journal and reversing entries in LIFO order.
17//! - **`cache clear`** — remove cached data. `--type cache` clears only the
18//!   match cache, `--type journal` clears only the journal, `--type all`
19//!   (default) clears both.
20//!
21//! All mutating operations acquire an exclusive file lock before proceeding.
22
23use crate::Result;
24use crate::cli::{ApplyArgs, CacheArgs, ClearArgs, ClearType, RollbackArgs, StatusArgs};
25use crate::config::ConfigService;
26use crate::core::lock::acquire_subx_lock;
27use crate::core::matcher::cache::CacheData;
28use crate::core::matcher::engine::{FileRelocationMode, MatchConfig, apply_cached_operations};
29use crate::core::matcher::journal::{
30    JournalData, JournalEntry, JournalEntryStatus, JournalOperationType,
31};
32use crate::error::SubXError;
33use serde_json::json;
34use std::io::IsTerminal;
35use std::path::{Path, PathBuf};
36use std::time::{SystemTime, UNIX_EPOCH};
37
38/// Resolve the configuration directory, preferring `XDG_CONFIG_HOME` when set.
39///
40/// This mirrors the path resolution used by the journal module so that cache
41/// and journal files live under the same parent directory across commands and
42/// tests (which typically override `XDG_CONFIG_HOME`).
43fn get_config_dir() -> Result<PathBuf> {
44    if let Some(xdg_config) = std::env::var_os("XDG_CONFIG_HOME") {
45        Ok(PathBuf::from(xdg_config))
46    } else {
47        dirs::config_dir().ok_or_else(|| SubXError::config("Unable to determine config directory"))
48    }
49}
50
51/// Resolve the canonical path to the match cache file.
52fn cache_path() -> Result<PathBuf> {
53    Ok(get_config_dir()?.join("subx").join("match_cache.json"))
54}
55
56/// Resolve the canonical path to the match journal file.
57fn journal_path() -> Result<PathBuf> {
58    Ok(get_config_dir()?.join("subx").join("match_journal.json"))
59}
60
61/// Delete `path` if it exists, printing a per-file confirmation message.
62///
63/// Returns `Ok(true)` when a file was removed, `Ok(false)` when no file was
64/// present, and propagates any I/O error encountered during deletion.
65fn clear_file(path: &Path, label: &str) -> Result<bool> {
66    if path.exists() {
67        std::fs::remove_file(path)?;
68        println!("{} cleared: {}", label, path.display());
69        Ok(true)
70    } else {
71        println!("{} not found: {}", label, path.display());
72        Ok(false)
73    }
74}
75
76/// Handle the `cache clear` subcommand, honoring the `--type` selector.
77async fn execute_clear(args: &ClearArgs) -> Result<()> {
78    let _lock = acquire_subx_lock().await?;
79    let config_dir = get_config_dir()?;
80    let cache_file = config_dir.join("subx").join("match_cache.json");
81    let journal_file = config_dir.join("subx").join("match_journal.json");
82
83    let mut cleared_any = false;
84
85    match args.r#type {
86        ClearType::Cache => {
87            cleared_any |= clear_file(&cache_file, "Cache")?;
88        }
89        ClearType::Journal => {
90            cleared_any |= clear_file(&journal_file, "Journal")?;
91        }
92        ClearType::All => {
93            cleared_any |= clear_file(&cache_file, "Cache")?;
94            cleared_any |= clear_file(&journal_file, "Journal")?;
95        }
96    }
97
98    if !cleared_any {
99        println!("No cache files found to clear.");
100    }
101    Ok(())
102}
103
104/// Compute a config validity hash for a given relocation mode and backup setting.
105///
106/// This mirrors `MatchEngine::calculate_config_hash`. For `cache status`, pass
107/// the default relocation mode (`"None"`) since the CLI flag is unavailable.
108/// For `cache apply`, pass the cache's recorded `original_relocation_mode` to
109/// get a correct comparison.
110fn compute_config_hash(relocation_mode_debug: &str, backup_enabled: bool) -> String {
111    use std::collections::hash_map::DefaultHasher;
112    use std::hash::{Hash, Hasher};
113    let mut hasher = DefaultHasher::new();
114    relocation_mode_debug.hash(&mut hasher);
115    backup_enabled.hash(&mut hasher);
116    format!("{:016x}", hasher.finish())
117}
118
119/// Compute the config hash assuming the default relocation mode.
120///
121/// Used by `cache status` where the CLI relocation flag is not available.
122fn current_config_hash(config_service: &dyn ConfigService) -> Result<String> {
123    let config = config_service.get_config()?;
124    Ok(compute_config_hash("None", config.general.backup_enabled))
125}
126
127/// Format a byte count as a short human-readable string (e.g. `2.4 KB`).
128fn format_size(bytes: u64) -> String {
129    const KB: f64 = 1024.0;
130    const MB: f64 = KB * 1024.0;
131    const GB: f64 = MB * 1024.0;
132    let b = bytes as f64;
133    if b >= GB {
134        format!("{:.1} GB", b / GB)
135    } else if b >= MB {
136        format!("{:.1} MB", b / MB)
137    } else if b >= KB {
138        format!("{:.1} KB", b / KB)
139    } else {
140        format!("{} B", bytes)
141    }
142}
143
144/// Format an age (in seconds) as a short human-readable phrase.
145fn format_age(age_secs: u64) -> String {
146    const MIN: u64 = 60;
147    const HOUR: u64 = 60 * MIN;
148    const DAY: u64 = 24 * HOUR;
149    if age_secs < MIN {
150        format!("{} seconds ago", age_secs)
151    } else if age_secs < HOUR {
152        format!("{} minutes ago", age_secs / MIN)
153    } else if age_secs < DAY {
154        format!("{} hours ago", age_secs / HOUR)
155    } else {
156        format!("{} days ago", age_secs / DAY)
157    }
158}
159
160/// Describe the snapshot state of a cache for human-readable reporting.
161///
162/// Returns a tuple `(label, machine_status)` where `label` is a user-facing
163/// string and `machine_status` is the JSON-friendly status identifier
164/// (`"valid"`, `"stale"`, or `"empty"`).
165fn describe_snapshot(cache: &CacheData) -> (String, &'static str) {
166    if cache.has_empty_snapshot() {
167        ("Empty (legacy cache)".to_string(), "empty")
168    } else {
169        let stale = cache.validate_snapshot();
170        if stale.is_empty() {
171            ("Valid".to_string(), "valid")
172        } else {
173            (format!("Stale ({} files changed)", stale.len()), "stale")
174        }
175    }
176}
177
178/// Handle the `cache status` subcommand.
179///
180/// Loads cache metadata from disk and prints a summary of its location,
181/// size, age, AI model, operation count, configuration fingerprint,
182/// snapshot freshness, and whether a journal exists. Supports a
183/// machine-readable `--json` output mode for scripting.
184///
185/// When no cache file is present, a friendly message is printed and the
186/// function returns `Ok(())` without error.
187///
188/// # Arguments
189///
190/// * `args` - Parsed `cache status` arguments controlling output format.
191/// * `config_service` - Active configuration service, used to recompute
192///   the configuration hash for comparison against the cached value.
193pub async fn execute_status(args: &StatusArgs, config_service: &dyn ConfigService) -> Result<()> {
194    let cache_file = cache_path()?;
195    let journal_file = journal_path()?;
196
197    if !cache_file.exists() {
198        if args.json {
199            let payload = json!({
200                "path": cache_file.to_string_lossy(),
201                "exists": false,
202                "journal_present": journal_file.exists(),
203            });
204            println!("{}", serde_json::to_string_pretty(&payload)?);
205        } else {
206            println!("No cache found at {}", cache_file.display());
207        }
208        return Ok(());
209    }
210
211    let cache = CacheData::load(&cache_file).map_err(|e| {
212        SubXError::config(format!(
213            "Failed to load cache at {}: {}",
214            cache_file.display(),
215            e
216        ))
217    })?;
218
219    let metadata = std::fs::metadata(&cache_file)?;
220    let size_bytes = metadata.len();
221
222    let now_secs = SystemTime::now()
223        .duration_since(UNIX_EPOCH)
224        .map(|d| d.as_secs())
225        .unwrap_or(0);
226    let age_secs = now_secs.saturating_sub(cache.created_at);
227
228    let current_hash = current_config_hash(config_service)?;
229    let hash_match = current_hash == cache.config_hash;
230
231    let (snapshot_label, snapshot_status) = describe_snapshot(&cache);
232    let stale_entries = if snapshot_status == "stale" {
233        cache.validate_snapshot()
234    } else {
235        Vec::new()
236    };
237    let journal_present = journal_file.exists();
238
239    if args.json {
240        let stale_files: Vec<serde_json::Value> = stale_entries
241            .iter()
242            .map(|s| json!({ "path": s.path, "reason": s.reason }))
243            .collect();
244        let payload = json!({
245            "path": cache_file.to_string_lossy(),
246            "exists": true,
247            "size_bytes": size_bytes,
248            "created_at": cache.created_at,
249            "age_seconds": age_secs,
250            "cache_version": cache.cache_version,
251            "ai_model": cache.ai_model_used,
252            "operation_count": cache.match_operations.len(),
253            "config_hash": cache.config_hash,
254            "config_hash_match": hash_match,
255            "current_config_hash": current_hash,
256            "snapshot_status": snapshot_status,
257            "stale_files": stale_files,
258            "journal_present": journal_present,
259        });
260        println!("{}", serde_json::to_string_pretty(&payload)?);
261    } else {
262        let config_line = if hash_match {
263            "✓ (matches current)".to_string()
264        } else {
265            format!("✗ (differs from current: {})", current_hash)
266        };
267        let journal_line = if journal_present {
268            "Present"
269        } else {
270            "Not found"
271        };
272
273        println!("Cache Status");
274        println!("============");
275        println!("Path:             {}", cache_file.display());
276        println!("Size:             {}", format_size(size_bytes));
277        println!("Age:              {}", format_age(age_secs));
278        println!("Cache version:    {}", cache.cache_version);
279        println!("AI model:         {}", cache.ai_model_used);
280        println!("Operations:       {}", cache.match_operations.len());
281        println!("Config hash:      {}", cache.config_hash);
282        println!("Config match:     {}", config_line);
283        println!("Snapshot:         {}", snapshot_label);
284        println!("Journal:          {}", journal_line);
285    }
286
287    Ok(())
288}
289
290/// Handle the `cache apply` subcommand.
291///
292/// Loads the cached dry-run results and replays the file operations without
293/// calling the AI provider. Validates the file snapshot and target paths
294/// before proceeding, prompts for confirmation unless `--yes` is supplied,
295/// and aborts on non-TTY stdin without `--yes`.
296///
297/// # Arguments
298///
299/// * `args` - Parsed `cache apply` arguments controlling validation bypass,
300///   confirmation, and confidence filtering.
301/// * `config_service` - Active configuration service for rebuilding the
302///   `MatchConfig` needed by the engine replay path.
303pub async fn execute_apply(args: &ApplyArgs, config_service: &dyn ConfigService) -> Result<()> {
304    let _lock = acquire_subx_lock().await?;
305
306    let cache_file = cache_path()?;
307    if !cache_file.exists() {
308        println!(
309            "No cache found at {}. Run a dry-run match first.",
310            cache_file.display()
311        );
312        return Ok(());
313    }
314
315    let mut cache = CacheData::load(&cache_file).map_err(|e| {
316        SubXError::config(format!(
317            "Failed to load cache at {}: {}",
318            cache_file.display(),
319            e
320        ))
321    })?;
322
323    // Config hash mismatch detection — use the cache's recorded relocation mode
324    let config = config_service.get_config()?;
325    let apply_hash = compute_config_hash(
326        &cache.original_relocation_mode,
327        config.general.backup_enabled,
328    );
329    if apply_hash != cache.config_hash && !args.force {
330        return Err(SubXError::config(format!(
331            "Configuration has changed since the cache was created.\n\
332             Cache hash:   {}\n\
333             Current hash: {}\n\
334             Use --force to bypass this check.",
335            cache.config_hash, apply_hash
336        )));
337    }
338
339    // Legacy cache with empty snapshot requires --force
340    if cache.has_empty_snapshot() && !args.force {
341        return Err(SubXError::config(
342            "Cache was created without file snapshot data (legacy format).\n\
343             Cannot verify file integrity. Use --force to apply anyway."
344                .to_string(),
345        ));
346    }
347
348    // Snapshot validation
349    if !args.force && !cache.has_empty_snapshot() {
350        let stale = cache.validate_snapshot();
351        if !stale.is_empty() {
352            let mut msg = format!(
353                "{} source file(s) have changed since the cache was created:\n",
354                stale.len()
355            );
356            for s in &stale {
357                msg.push_str(&format!("  - {} ({})\n", s.path, s.reason));
358            }
359            msg.push_str("Use --force to apply anyway.");
360            return Err(SubXError::config(msg));
361        }
362    }
363
364    // Target path conflict detection
365    if !args.force {
366        let conflicts = cache.validate_target_paths();
367        if !conflicts.is_empty() {
368            let mut msg = format!("{} target path(s) already exist:\n", conflicts.len());
369            for p in &conflicts {
370                msg.push_str(&format!("  - {}\n", p.display()));
371            }
372            msg.push_str("Use --force to apply anyway.");
373            return Err(SubXError::config(msg));
374        }
375    }
376
377    // Apply confidence filter
378    if let Some(min_conf) = args.confidence {
379        let threshold = f32::from(min_conf) / 100.0;
380        let before = cache.match_operations.len();
381        cache
382            .match_operations
383            .retain(|op| op.confidence >= threshold);
384        let after = cache.match_operations.len();
385        if before != after {
386            println!(
387                "Filtered {} operation(s) below {}% confidence.",
388                before - after,
389                min_conf
390            );
391        }
392    }
393
394    if cache.match_operations.is_empty() {
395        println!("No operations to apply.");
396        return Ok(());
397    }
398
399    // Display summary
400    println!("Cache Apply Summary");
401    println!("===================");
402    println!("Operations:       {}", cache.match_operations.len());
403    println!("AI model:         {}", cache.ai_model_used);
404    println!("Relocation mode:  {}", cache.original_relocation_mode);
405    println!();
406    for (i, op) in cache.match_operations.iter().enumerate() {
407        println!(
408            "  {}. {} → {} (confidence: {:.0}%)",
409            i + 1,
410            op.subtitle_file,
411            op.new_subtitle_name,
412            op.confidence * 100.0
413        );
414    }
415    println!();
416
417    // Non-TTY check and interactive confirmation
418    if !args.yes {
419        if !std::io::stdin().is_terminal() {
420            return Err(SubXError::config(
421                "Non-interactive terminal detected. Use --yes to skip confirmation.".to_string(),
422            ));
423        }
424        print!("Proceed with apply? [y/N] ");
425        use std::io::Write;
426        std::io::stdout().flush()?;
427        let mut input = String::new();
428        std::io::stdin().read_line(&mut input)?;
429        if !input.trim().eq_ignore_ascii_case("y") {
430            println!("Apply cancelled.");
431            return Ok(());
432        }
433    }
434
435    // Build MatchConfig from config service
436    let config = config_service.get_config()?;
437    let relocation_mode = parse_relocation_mode(&cache.original_relocation_mode);
438    let match_config = MatchConfig {
439        confidence_threshold: 0.0,
440        max_sample_length: 2000,
441        enable_content_analysis: true,
442        backup_enabled: cache.original_backup_enabled,
443        relocation_mode,
444        conflict_resolution: crate::core::matcher::engine::ConflictResolution::Skip,
445        ai_model: cache.ai_model_used.clone(),
446        max_subtitle_bytes: config.general.max_subtitle_bytes,
447    };
448
449    apply_cached_operations(&cache, &match_config).await?;
450    println!("Apply complete.");
451    Ok(())
452}
453
454/// Parse a relocation mode string from cache metadata back into an enum value.
455fn parse_relocation_mode(s: &str) -> FileRelocationMode {
456    match s {
457        "Copy" => FileRelocationMode::Copy,
458        "Move" => FileRelocationMode::Move,
459        _ => FileRelocationMode::None,
460    }
461}
462
463/// Verify that a destination file still matches the metadata recorded in
464/// the journal entry at the time of the original operation.
465///
466/// The check compares file size and modification time (seconds since the
467/// Unix epoch). A mismatch or a missing destination aborts the rollback
468/// and returns a descriptive error so the user can investigate or opt in
469/// to force rollback via the `--force` flag.
470fn verify_destination_integrity(entry: &JournalEntry) -> Result<()> {
471    let metadata = match std::fs::metadata(&entry.destination) {
472        Ok(m) => m,
473        Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
474            return Err(SubXError::config(format!(
475                "Destination file {} no longer exists. Use --force to override.",
476                entry.destination.display()
477            )));
478        }
479        Err(e) => return Err(SubXError::Io(e)),
480    };
481
482    if metadata.len() != entry.file_size {
483        return Err(SubXError::config(format!(
484            "Destination file {} has been modified since the operation (size differs). \
485             Use --force to override.",
486            entry.destination.display()
487        )));
488    }
489
490    let mtime_secs = metadata
491        .modified()
492        .ok()
493        .and_then(|m| m.duration_since(UNIX_EPOCH).ok())
494        .map(|d| d.as_secs());
495
496    if let Some(actual) = mtime_secs {
497        if actual != entry.file_mtime {
498            return Err(SubXError::config(format!(
499                "Destination file {} has been modified since the operation (mtime differs). \
500                 Use --force to override.",
501                entry.destination.display()
502            )));
503        }
504    }
505
506    Ok(())
507}
508
509/// Reverse the effect of a single completed journal entry.
510///
511/// The reversal depends on the original operation:
512/// - `Copied`: the destination copy is deleted, leaving the source intact.
513/// - `Moved` / `Renamed`: the destination is moved back to the original
514///   source path via `std::fs::rename`.
515///
516/// If the entry recorded a backup file, that backup is deleted after the
517/// primary reversal succeeds.
518///
519/// For `Moved`/`Renamed` operations the function checks that the original
520/// source path is vacant before renaming back. If the source already exists
521/// and `force` is false, an error is returned.
522fn rollback_entry(entry: &JournalEntry, force: bool) -> Result<()> {
523    match entry.operation_type {
524        JournalOperationType::Copied => {
525            std::fs::remove_file(&entry.destination)?;
526            println!("Removed copy: {}", entry.destination.display());
527        }
528        JournalOperationType::Moved | JournalOperationType::Renamed => {
529            if entry.source.exists() && !force {
530                return Err(SubXError::config(format!(
531                    "Original source path {} already exists. \
532                     Rollback would overwrite it. Use --force to override.",
533                    entry.source.display()
534                )));
535            }
536            if let Some(parent) = entry.source.parent() {
537                if !parent.as_os_str().is_empty() {
538                    std::fs::create_dir_all(parent)?;
539                }
540            }
541            std::fs::rename(&entry.destination, &entry.source)?;
542            println!(
543                "Rolled back: {} \u{2190} {}",
544                entry.source.display(),
545                entry.destination.display()
546            );
547        }
548    }
549
550    if let Some(backup) = &entry.backup_path {
551        if backup.exists() {
552            std::fs::remove_file(backup)?;
553            println!("Removed backup: {}", backup.display());
554        }
555    }
556
557    Ok(())
558}
559
560/// Handle the `cache rollback` subcommand.
561///
562/// Acquires the process-wide SubX lock, loads the journal, and replays
563/// completed entries in last-in-first-out order — undoing each file
564/// operation. When the rollback finishes successfully the journal file
565/// is removed so subsequent commands start from a clean state.
566///
567/// A missing journal is not an error; it yields an informational message
568/// and returns `Ok(())`. When `--force` is not supplied, the command
569/// aborts before touching any file if any destination's size or mtime no
570/// longer matches the journal record.
571pub async fn execute_rollback(args: &RollbackArgs) -> Result<()> {
572    let _lock = acquire_subx_lock().await?;
573
574    let journal_file = journal_path()?;
575    if !journal_file.exists() {
576        println!("No operation journal found. Nothing to rollback.");
577        return Ok(());
578    }
579
580    let journal = JournalData::load(&journal_file).await?;
581
582    let reversed: Vec<&JournalEntry> = journal
583        .entries
584        .iter()
585        .filter(|e| e.status == JournalEntryStatus::Completed)
586        .rev()
587        .collect();
588
589    if reversed.is_empty() {
590        println!("Journal has no completed operations to rollback.");
591        return Ok(());
592    }
593
594    println!(
595        "Rolling back {} operations from batch {}...",
596        reversed.len(),
597        journal.batch_id
598    );
599
600    for entry in &reversed {
601        if !args.force {
602            verify_destination_integrity(entry)?;
603        }
604        rollback_entry(entry, args.force)?;
605    }
606
607    std::fs::remove_file(&journal_file)?;
608    println!("Rollback complete. Journal deleted.");
609    Ok(())
610}
611
612/// Dispatch the cache subcommand using the production configuration service.
613///
614/// For testable code paths, prefer [`execute_with_config`] which accepts an
615/// injected [`ConfigService`].
616pub async fn execute(args: CacheArgs) -> Result<()> {
617    match args.action {
618        crate::cli::CacheAction::Clear(clear_args) => {
619            execute_clear(&clear_args).await?;
620        }
621        crate::cli::CacheAction::Status(status_args) => {
622            // Fall back to the production configuration service when no service
623            // was injected by the caller. This keeps the legacy `execute` entry
624            // point functional for users invoking it directly.
625            let config_service = crate::config::ProductionConfigService::new()?;
626            execute_status(&status_args, &config_service).await?;
627        }
628        crate::cli::CacheAction::Apply(ref apply_args) => {
629            let config_service = crate::config::ProductionConfigService::new()?;
630            execute_apply(apply_args, &config_service).await?;
631        }
632        crate::cli::CacheAction::Rollback(rollback_args) => {
633            execute_rollback(&rollback_args).await?;
634        }
635    }
636    Ok(())
637}
638
639/// Execute cache management command with injected configuration service.
640///
641/// This function provides the new dependency injection interface for the cache command,
642/// accepting a configuration service instead of loading configuration globally.
643///
644/// # Arguments
645///
646/// * `args` - Cache command arguments
647/// * `config_service` - Configuration service providing access to cache settings
648///
649/// # Returns
650///
651/// Returns `Ok(())` on successful completion, or an error if the operation fails.
652pub async fn execute_with_config(
653    args: CacheArgs,
654    config_service: std::sync::Arc<dyn ConfigService>,
655) -> Result<()> {
656    match args.action {
657        crate::cli::CacheAction::Status(status_args) => {
658            execute_status(&status_args, config_service.as_ref()).await
659        }
660        crate::cli::CacheAction::Apply(apply_args) => {
661            execute_apply(&apply_args, config_service.as_ref()).await
662        }
663        other => execute(CacheArgs { action: other }).await,
664    }
665}