Skip to main content

sqlite_graphrag/commands/
cache.rs

1//! Handler for the `cache` CLI subcommand and its nested operations.
2//!
3//! Manages cached resources such as the multilingual-e5-small ONNX model and
4//! the BERT NER classifier downloaded into the XDG cache directory on first
5//! `init`. Used to reclaim disk space or recover from corrupted cache state.
6
7use crate::errors::AppError;
8use crate::output;
9use crate::paths::AppPaths;
10use serde::Serialize;
11
12#[derive(clap::Args)]
13#[command(after_long_help = "EXAMPLES:\n  \
14    # Remove cached embedding/NER model files (forces re-download on next init)\n  \
15    sqlite-graphrag cache clear-models\n\n  \
16    # Skip the confirmation prompt\n  \
17    sqlite-graphrag cache clear-models --yes")]
18pub struct CacheArgs {
19    #[command(subcommand)]
20    pub command: CacheCommands,
21}
22
23#[derive(clap::Subcommand)]
24pub enum CacheCommands {
25    /// Remove cached embedding/NER model files (forces re-download on next `init`).
26    ClearModels(ClearModelsArgs),
27}
28
29#[derive(clap::Args)]
30pub struct ClearModelsArgs {
31    /// Skip confirmation prompt and proceed with deletion immediately.
32    #[arg(long, default_value_t = false, help = "Skip confirmation prompt")]
33    pub yes: bool,
34    /// Output format: json (default), text, or markdown.
35    #[arg(long, hide = true, help = "No-op; JSON is always emitted on stdout")]
36    pub json: bool,
37}
38
39#[derive(Serialize)]
40struct ClearModelsResponse {
41    cache_path: String,
42    existed: bool,
43    bytes_freed: u64,
44    files_removed: usize,
45    /// Total execution time in milliseconds from handler start to serialisation.
46    elapsed_ms: u64,
47}
48
49pub fn run(args: CacheArgs) -> Result<(), AppError> {
50    match args.command {
51        CacheCommands::ClearModels(a) => clear_models(a),
52    }
53}
54
55fn clear_models(args: ClearModelsArgs) -> Result<(), AppError> {
56    let inicio = std::time::Instant::now();
57    // Resolve the canonical models directory through AppPaths to honour
58    // SQLITE_GRAPHRAG_CACHE_DIR overrides used by tests and CI.
59    let paths = AppPaths::resolve(None)?;
60    let models_dir = paths.models.clone();
61
62    if !args.yes {
63        // For machine consumption stay deterministic: refuse without --yes.
64        return Err(AppError::Validation(
65            "destructive operation: pass --yes to confirm cache deletion".to_string(),
66        ));
67    }
68
69    let existed = models_dir.exists();
70    let mut bytes_freed: u64 = 0;
71    let mut files_removed: usize = 0;
72
73    if existed {
74        bytes_freed = dir_size(&models_dir).unwrap_or(0);
75        files_removed = count_files(&models_dir).unwrap_or(0);
76        std::fs::remove_dir_all(&models_dir)?;
77    }
78
79    output::emit_json(&ClearModelsResponse {
80        cache_path: models_dir.display().to_string(),
81        existed,
82        bytes_freed,
83        files_removed,
84        elapsed_ms: inicio.elapsed().as_millis() as u64,
85    })?;
86
87    Ok(())
88}
89
90fn dir_size(path: &std::path::Path) -> std::io::Result<u64> {
91    let mut total = 0u64;
92    for entry in std::fs::read_dir(path)? {
93        let entry = entry?;
94        let meta = entry.metadata()?;
95        if meta.is_dir() {
96            total = total.saturating_add(dir_size(&entry.path()).unwrap_or(0));
97        } else {
98            total = total.saturating_add(meta.len());
99        }
100    }
101    Ok(total)
102}
103
104fn count_files(path: &std::path::Path) -> std::io::Result<usize> {
105    let mut count = 0usize;
106    for entry in std::fs::read_dir(path)? {
107        let entry = entry?;
108        let meta = entry.metadata()?;
109        if meta.is_dir() {
110            count = count.saturating_add(count_files(&entry.path()).unwrap_or(0));
111        } else {
112            count += 1;
113        }
114    }
115    Ok(count)
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121
122    #[test]
123    fn clear_models_response_serializes_all_fields() {
124        let resp = ClearModelsResponse {
125            cache_path: "/tmp/sqlite-graphrag/models".to_string(),
126            existed: true,
127            bytes_freed: 465_000_000,
128            files_removed: 14,
129            elapsed_ms: 12,
130        };
131        let json = serde_json::to_value(&resp).expect("serialization");
132        assert_eq!(json["existed"], true);
133        assert_eq!(json["bytes_freed"], 465_000_000u64);
134        assert_eq!(json["files_removed"], 14);
135        assert_eq!(json["elapsed_ms"], 12);
136    }
137
138    #[test]
139    fn clear_models_without_yes_returns_validation_error() {
140        let args = ClearModelsArgs {
141            yes: false,
142            json: false,
143        };
144        let result = clear_models(args);
145        assert!(matches!(result, Err(AppError::Validation(_))));
146    }
147}