Skip to main content

sqlite_graphrag/commands/
cache.rs

1//! Handler for the `cache` CLI subcommand and its nested operations.
2//!
3//! Manages cached resources such as the multilingual-e5-small ONNX model and
4//! the BERT NER classifier downloaded into the XDG cache directory on first
5//! `init`. Used to reclaim disk space or recover from corrupted cache state.
6
7use crate::errors::AppError;
8use crate::output;
9use crate::paths::AppPaths;
10use serde::Serialize;
11
12#[derive(clap::Args)]
13pub struct CacheArgs {
14    #[command(subcommand)]
15    pub command: CacheCommands,
16}
17
18#[derive(clap::Subcommand)]
19pub enum CacheCommands {
20    /// Remove cached embedding/NER model files (forces re-download on next `init`).
21    ClearModels(ClearModelsArgs),
22}
23
24#[derive(clap::Args)]
25pub struct ClearModelsArgs {
26    /// Skip confirmation prompt and proceed with deletion immediately.
27    #[arg(long, default_value_t = false, help = "Skip confirmation prompt")]
28    pub yes: bool,
29    /// Output format: json (default), text, or markdown.
30    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
31    pub json: bool,
32}
33
34#[derive(Serialize)]
35struct ClearModelsResponse {
36    cache_path: String,
37    existed: bool,
38    bytes_freed: u64,
39    files_removed: usize,
40    /// Total execution time in milliseconds from handler start to serialisation.
41    elapsed_ms: u64,
42}
43
44pub fn run(args: CacheArgs) -> Result<(), AppError> {
45    match args.command {
46        CacheCommands::ClearModels(a) => clear_models(a),
47    }
48}
49
50fn clear_models(args: ClearModelsArgs) -> Result<(), AppError> {
51    let inicio = std::time::Instant::now();
52    // Resolve the canonical models directory through AppPaths to honour
53    // SQLITE_GRAPHRAG_CACHE_DIR overrides used by tests and CI.
54    let paths = AppPaths::resolve(None)?;
55    let models_dir = paths.models.clone();
56
57    if !args.yes {
58        // For machine consumption stay deterministic: refuse without --yes.
59        return Err(AppError::Validation(
60            "destructive operation: pass --yes to confirm cache deletion".to_string(),
61        ));
62    }
63
64    let existed = models_dir.exists();
65    let mut bytes_freed: u64 = 0;
66    let mut files_removed: usize = 0;
67
68    if existed {
69        bytes_freed = dir_size(&models_dir).unwrap_or(0);
70        files_removed = count_files(&models_dir).unwrap_or(0);
71        std::fs::remove_dir_all(&models_dir)?;
72    }
73
74    output::emit_json(&ClearModelsResponse {
75        cache_path: models_dir.display().to_string(),
76        existed,
77        bytes_freed,
78        files_removed,
79        elapsed_ms: inicio.elapsed().as_millis() as u64,
80    })?;
81
82    Ok(())
83}
84
85fn dir_size(path: &std::path::Path) -> std::io::Result<u64> {
86    let mut total = 0u64;
87    for entry in std::fs::read_dir(path)? {
88        let entry = entry?;
89        let meta = entry.metadata()?;
90        if meta.is_dir() {
91            total = total.saturating_add(dir_size(&entry.path()).unwrap_or(0));
92        } else {
93            total = total.saturating_add(meta.len());
94        }
95    }
96    Ok(total)
97}
98
99fn count_files(path: &std::path::Path) -> std::io::Result<usize> {
100    let mut count = 0usize;
101    for entry in std::fs::read_dir(path)? {
102        let entry = entry?;
103        let meta = entry.metadata()?;
104        if meta.is_dir() {
105            count = count.saturating_add(count_files(&entry.path()).unwrap_or(0));
106        } else {
107            count += 1;
108        }
109    }
110    Ok(count)
111}
112
113#[cfg(test)]
114mod tests {
115    use super::*;
116
117    #[test]
118    fn clear_models_response_serializes_all_fields() {
119        let resp = ClearModelsResponse {
120            cache_path: "/tmp/sqlite-graphrag/models".to_string(),
121            existed: true,
122            bytes_freed: 465_000_000,
123            files_removed: 14,
124            elapsed_ms: 12,
125        };
126        let json = serde_json::to_value(&resp).expect("serialization");
127        assert_eq!(json["existed"], true);
128        assert_eq!(json["bytes_freed"], 465_000_000u64);
129        assert_eq!(json["files_removed"], 14);
130        assert_eq!(json["elapsed_ms"], 12);
131    }
132
133    #[test]
134    fn clear_models_without_yes_returns_validation_error() {
135        let args = ClearModelsArgs {
136            yes: false,
137            json: false,
138        };
139        let result = clear_models(args);
140        assert!(matches!(result, Err(AppError::Validation(_))));
141    }
142}