Skip to main content

sqlite_graphrag/
paths.rs

1//! XDG/cwd path resolution and traversal-safe overrides.
2//!
3//! Resolves data directories via [`directories::ProjectDirs`] and validates
4//! that user-supplied paths cannot escape the project root.
5
6use crate::errors::AppError;
7use crate::i18n::validation;
8use directories::ProjectDirs;
9use std::path::{Component, Path, PathBuf};
10
11/// Resolved filesystem paths used by the CLI at runtime.
12///
13/// Constructed via [`AppPaths::resolve`], which applies the three-layer precedence:
14/// CLI flag → `SQLITE_GRAPHRAG_DB_PATH` env var → `SQLITE_GRAPHRAG_HOME` env var → cwd.
15#[derive(Debug, Clone)]
16pub struct AppPaths {
17    /// Absolute path to the SQLite database file.
18    pub db: PathBuf,
19    /// Directory where embedding model files are cached.
20    pub models: PathBuf,
21}
22
23impl AppPaths {
24    pub fn resolve(db_override: Option<&str>) -> Result<Self, AppError> {
25        let proj = ProjectDirs::from("", "", "sqlite-graphrag").ok_or_else(|| {
26            AppError::Io(std::io::Error::other("could not determine home directory"))
27        })?;
28
29        let cache_root = if let Some(override_dir) = std::env::var_os("SQLITE_GRAPHRAG_CACHE_DIR") {
30            PathBuf::from(override_dir)
31        } else {
32            proj.cache_dir().to_path_buf()
33        };
34
35        let db = if let Some(p) = db_override {
36            validate_path(p)?;
37            PathBuf::from(p)
38        } else if let Ok(env_path) = std::env::var("SQLITE_GRAPHRAG_DB_PATH") {
39            validate_path(&env_path)?;
40            PathBuf::from(env_path)
41        } else if let Some(home_dir) = home_env_dir()? {
42            home_dir.join("graphrag.sqlite")
43        } else {
44            std::env::current_dir()
45                .map_err(AppError::Io)?
46                .join("graphrag.sqlite")
47        };
48
49        Ok(Self {
50            db,
51            models: cache_root.join("models"),
52        })
53    }
54
55    pub fn ensure_dirs(&self) -> Result<(), AppError> {
56        for dir in [parent_or_err(&self.db)?, self.models.as_path()] {
57            std::fs::create_dir_all(dir)?;
58        }
59        Ok(())
60    }
61}
62
63fn validate_path(p: &str) -> Result<(), AppError> {
64    if Path::new(p).components().any(|c| c == Component::ParentDir) {
65        return Err(AppError::Validation(validation::path_traversal(p)));
66    }
67    Ok(())
68}
69
70/// Resolves `SQLITE_GRAPHRAG_HOME` as the root directory for the default database.
71///
72/// Returns `Ok(Some(dir))` when the env var is set and valid,
73/// `Ok(None)` when absent or empty (falls back to `current_dir`),
74/// and `Err(...)` when the value contains traversal components.
75fn home_env_dir() -> Result<Option<PathBuf>, AppError> {
76    let raw = match std::env::var("SQLITE_GRAPHRAG_HOME") {
77        Ok(v) => v,
78        Err(_) => return Ok(None),
79    };
80    if raw.is_empty() {
81        return Ok(None);
82    }
83    validate_path(&raw)?;
84    Ok(Some(PathBuf::from(raw)))
85}
86
87pub(crate) fn parent_or_err(path: &Path) -> Result<&Path, AppError> {
88    path.parent().ok_or_else(|| {
89        AppError::Validation(format!(
90            "path '{}' has no valid parent component",
91            path.display()
92        ))
93    })
94}
95
96#[cfg(test)]
97mod tests {
98    use super::*;
99    use serial_test::serial;
100    use tempfile::TempDir;
101
102    /// Clears all variables that affect `AppPaths::resolve` to isolate the
103    /// test from the developer/CI environment.
104    fn clean_env_paths() {
105        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
106        unsafe {
107            std::env::remove_var("SQLITE_GRAPHRAG_HOME");
108            std::env::remove_var("SQLITE_GRAPHRAG_DB_PATH");
109            std::env::remove_var("SQLITE_GRAPHRAG_CACHE_DIR");
110        }
111    }
112
113    #[test]
114    #[serial]
115    fn home_env_resolves_db_in_subdir() {
116        clean_env_paths();
117        let tmp = TempDir::new().expect("tempdir");
118        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
119        unsafe {
120            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp.path());
121        }
122
123        let paths = AppPaths::resolve(None).expect("resolve with valid HOME");
124        assert_eq!(paths.db, tmp.path().join("graphrag.sqlite"));
125
126        clean_env_paths();
127    }
128
129    #[test]
130    #[serial]
131    fn home_env_traversal_rejected() {
132        clean_env_paths();
133        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
134        unsafe {
135            std::env::set_var("SQLITE_GRAPHRAG_HOME", "/tmp/../etc");
136        }
137
138        let result = AppPaths::resolve(None);
139        assert!(
140            matches!(result, Err(AppError::Validation(_))),
141            "traversal in SQLITE_GRAPHRAG_HOME must fail as Validation, got {result:?}"
142        );
143
144        clean_env_paths();
145    }
146
147    #[test]
148    #[serial]
149    fn db_path_overrides_home() {
150        clean_env_paths();
151        let tmp_home = TempDir::new().expect("tempdir home");
152        let tmp_db = TempDir::new().expect("tempdir db");
153        let explicit_db = tmp_db.path().join("explicit.sqlite");
154        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
155        unsafe {
156            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp_home.path());
157            std::env::set_var("SQLITE_GRAPHRAG_DB_PATH", &explicit_db);
158        }
159
160        let paths = AppPaths::resolve(None).expect("resolve with DB_PATH and HOME");
161        assert_eq!(paths.db, explicit_db);
162
163        clean_env_paths();
164    }
165
166    #[test]
167    #[serial]
168    fn flag_overrides_home() {
169        clean_env_paths();
170        let tmp_home = TempDir::new().expect("tempdir home");
171        let tmp_flag = TempDir::new().expect("tempdir flag");
172        let db_flag = tmp_flag.path().join("via-flag.sqlite");
173        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
174        unsafe {
175            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp_home.path());
176        }
177
178        let paths = AppPaths::resolve(Some(db_flag.to_str().expect("utf8")))
179            .expect("resolve with flag and HOME");
180        assert_eq!(paths.db, db_flag);
181
182        clean_env_paths();
183    }
184
185    #[test]
186    #[serial]
187    fn home_env_empty_falls_back_to_cwd() {
188        clean_env_paths();
189        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
190        unsafe {
191            std::env::set_var("SQLITE_GRAPHRAG_HOME", "");
192        }
193
194        let paths = AppPaths::resolve(None).expect("resolve with empty HOME");
195        let expected = std::env::current_dir()
196            .expect("cwd")
197            .join("graphrag.sqlite");
198        assert_eq!(paths.db, expected);
199
200        clean_env_paths();
201    }
202
203    #[test]
204    fn parent_or_err_accepts_normal_path() {
205        let p = PathBuf::from("/home/user/db.sqlite");
206        let parent = parent_or_err(&p).expect("valid parent");
207        assert_eq!(parent, Path::new("/home/user"));
208    }
209
210    #[test]
211    fn parent_or_err_accepts_relative_path() {
212        let p = PathBuf::from("subdir/file.sqlite");
213        let parent = parent_or_err(&p).expect("relative parent");
214        assert_eq!(parent, Path::new("subdir"));
215    }
216
217    #[test]
218    fn parent_or_err_rejects_unix_root() {
219        let p = PathBuf::from("/");
220        let result = parent_or_err(&p);
221        assert!(matches!(result, Err(AppError::Validation(_))));
222    }
223
224    #[test]
225    fn parent_or_err_rejects_empty_path() {
226        let p = PathBuf::from("");
227        let result = parent_or_err(&p);
228        assert!(matches!(result, Err(AppError::Validation(_))));
229    }
230}