Skip to main content

sqlite_graphrag/
paths.rs

1//! XDG/cwd path resolution and traversal-safe overrides.
2//!
3//! Resolves data directories via [`directories::ProjectDirs`] and validates
4//! that user-supplied paths cannot escape the project root.
5
6use crate::errors::AppError;
7use crate::i18n::validation;
8use directories::ProjectDirs;
9use std::path::{Component, Path, PathBuf};
10
11/// Resolved filesystem paths used by the CLI at runtime.
12///
13/// Constructed via [`AppPaths::resolve`], which applies the three-layer precedence:
14/// CLI flag → `SQLITE_GRAPHRAG_DB_PATH` env var → `SQLITE_GRAPHRAG_HOME` env var → cwd.
15#[derive(Debug, Clone)]
16pub struct AppPaths {
17    /// Absolute path to the SQLite database file.
18    pub db: PathBuf,
19    /// Directory where embedding model files are cached.
20    pub models: PathBuf,
21}
22
23impl AppPaths {
24    pub fn resolve(db_override: Option<&str>) -> Result<Self, AppError> {
25        let proj = ProjectDirs::from("", "", "sqlite-graphrag").ok_or_else(|| {
26            AppError::Io(std::io::Error::other("could not determine home directory"))
27        })?;
28
29        let cache_root = if let Some(override_dir) = std::env::var_os("SQLITE_GRAPHRAG_CACHE_DIR") {
30            PathBuf::from(override_dir)
31        } else {
32            proj.cache_dir().to_path_buf()
33        };
34
35        let db = if let Some(p) = db_override {
36            validate_path(p)?;
37            PathBuf::from(p)
38        } else if let Ok(env_path) = std::env::var("SQLITE_GRAPHRAG_DB_PATH") {
39            validate_path(&env_path)?;
40            PathBuf::from(env_path)
41        } else if let Some(home_dir) = home_env_dir()? {
42            home_dir.join("graphrag.sqlite")
43        } else {
44            std::env::current_dir()
45                .map_err(AppError::Io)?
46                .join("graphrag.sqlite")
47        };
48
49        Ok(Self {
50            db,
51            models: cache_root.join("models"),
52        })
53    }
54
55    pub fn ensure_dirs(&self) -> Result<(), AppError> {
56        for dir in [parent_or_err(&self.db)?, self.models.as_path()] {
57            std::fs::create_dir_all(dir)?;
58        }
59        Ok(())
60    }
61}
62
63fn validate_path(p: &str) -> Result<(), AppError> {
64    if Path::new(p).components().any(|c| c == Component::ParentDir) {
65        return Err(AppError::Validation(validation::path_traversal(p)));
66    }
67    Ok(())
68}
69
70/// Resolves `SQLITE_GRAPHRAG_HOME` as the root directory for the default database.
71///
72/// Returns `Ok(Some(dir))` when the env var is set and valid,
73/// `Ok(None)` when absent or empty (falls back to `current_dir`),
74/// and `Err(...)` when the value contains traversal components.
75fn home_env_dir() -> Result<Option<PathBuf>, AppError> {
76    let raw = match std::env::var("SQLITE_GRAPHRAG_HOME") {
77        Ok(v) => v,
78        Err(_) => return Ok(None),
79    };
80    if raw.is_empty() {
81        return Ok(None);
82    }
83    validate_path(&raw)?;
84    Ok(Some(PathBuf::from(raw)))
85}
86
87/// Returns the XDG config directory for the application.
88pub fn config_dir() -> Result<PathBuf, AppError> {
89    let proj = ProjectDirs::from("", "", "sqlite-graphrag").ok_or_else(|| {
90        AppError::Io(std::io::Error::other(
91            "could not determine home directory for config",
92        ))
93    })?;
94    Ok(proj.config_dir().to_path_buf())
95}
96
97pub(crate) fn parent_or_err(path: &Path) -> Result<&Path, AppError> {
98    path.parent().ok_or_else(|| {
99        AppError::Validation(format!(
100            "path '{}' has no valid parent component",
101            path.display()
102        ))
103    })
104}
105
106/// Derives a sidecar file path next to the database (e.g. the enrich/ingest
107/// queue), so worklist files follow `--db` instead of the process CWD. Falls
108/// back to the bare filename (CWD) when `db_path` has no parent — preserving the
109/// legacy default-DB layout.
110pub fn sidecar_path(db_path: &Path, filename: &str) -> PathBuf {
111    db_path
112        .parent()
113        .filter(|p| !p.as_os_str().is_empty())
114        .map(|p| p.join(filename))
115        .unwrap_or_else(|| PathBuf::from(filename))
116}
117
118#[cfg(test)]
119mod tests {
120    use super::*;
121    use serial_test::serial;
122    use tempfile::TempDir;
123
124    /// Clears all variables that affect `AppPaths::resolve` to isolate the
125    /// test from the developer/CI environment.
126    fn clean_env_paths() {
127        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
128        unsafe {
129            std::env::remove_var("SQLITE_GRAPHRAG_HOME");
130            std::env::remove_var("SQLITE_GRAPHRAG_DB_PATH");
131            std::env::remove_var("SQLITE_GRAPHRAG_CACHE_DIR");
132        }
133    }
134
135    #[test]
136    #[serial]
137    fn home_env_resolves_db_in_subdir() {
138        clean_env_paths();
139        let tmp = TempDir::new().expect("tempdir");
140        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
141        unsafe {
142            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp.path());
143        }
144
145        let paths = AppPaths::resolve(None).expect("resolve with valid HOME");
146        assert_eq!(paths.db, tmp.path().join("graphrag.sqlite"));
147
148        clean_env_paths();
149    }
150
151    #[test]
152    #[serial]
153    fn home_env_traversal_rejected() {
154        clean_env_paths();
155        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
156        unsafe {
157            std::env::set_var("SQLITE_GRAPHRAG_HOME", "/tmp/../etc");
158        }
159
160        let result = AppPaths::resolve(None);
161        assert!(
162            matches!(result, Err(AppError::Validation(_))),
163            "traversal in SQLITE_GRAPHRAG_HOME must fail as Validation, got {result:?}"
164        );
165
166        clean_env_paths();
167    }
168
169    #[test]
170    #[serial]
171    fn db_path_overrides_home() {
172        clean_env_paths();
173        let tmp_home = TempDir::new().expect("tempdir home");
174        let tmp_db = TempDir::new().expect("tempdir db");
175        let explicit_db = tmp_db.path().join("explicit.sqlite");
176        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
177        unsafe {
178            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp_home.path());
179            std::env::set_var("SQLITE_GRAPHRAG_DB_PATH", &explicit_db);
180        }
181
182        let paths = AppPaths::resolve(None).expect("resolve with DB_PATH and HOME");
183        assert_eq!(paths.db, explicit_db);
184
185        clean_env_paths();
186    }
187
188    #[test]
189    #[serial]
190    fn flag_overrides_home() {
191        clean_env_paths();
192        let tmp_home = TempDir::new().expect("tempdir home");
193        let tmp_flag = TempDir::new().expect("tempdir flag");
194        let db_flag = tmp_flag.path().join("via-flag.sqlite");
195        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
196        unsafe {
197            std::env::set_var("SQLITE_GRAPHRAG_HOME", tmp_home.path());
198        }
199
200        let paths = AppPaths::resolve(Some(db_flag.to_str().expect("utf8")))
201            .expect("resolve with flag and HOME");
202        assert_eq!(paths.db, db_flag);
203
204        clean_env_paths();
205    }
206
207    #[test]
208    #[serial]
209    fn home_env_empty_falls_back_to_cwd() {
210        clean_env_paths();
211        // SAFETY: tests are annotated with #[serial], guaranteeing single-threaded execution.
212        unsafe {
213            std::env::set_var("SQLITE_GRAPHRAG_HOME", "");
214        }
215
216        let paths = AppPaths::resolve(None).expect("resolve with empty HOME");
217        let expected = std::env::current_dir()
218            .expect("cwd")
219            .join("graphrag.sqlite");
220        assert_eq!(paths.db, expected);
221
222        clean_env_paths();
223    }
224
225    #[test]
226    fn parent_or_err_accepts_normal_path() {
227        let p = PathBuf::from("/home/user/db.sqlite");
228        let parent = parent_or_err(&p).expect("valid parent");
229        assert_eq!(parent, Path::new("/home/user"));
230    }
231
232    #[test]
233    fn parent_or_err_accepts_relative_path() {
234        let p = PathBuf::from("subdir/file.sqlite");
235        let parent = parent_or_err(&p).expect("relative parent");
236        assert_eq!(parent, Path::new("subdir"));
237    }
238
239    #[test]
240    fn parent_or_err_rejects_unix_root() {
241        let p = PathBuf::from("/");
242        let result = parent_or_err(&p);
243        assert!(matches!(result, Err(AppError::Validation(_))));
244    }
245
246    #[test]
247    fn parent_or_err_rejects_empty_path() {
248        let p = PathBuf::from("");
249        let result = parent_or_err(&p);
250        assert!(matches!(result, Err(AppError::Validation(_))));
251    }
252
253    #[test]
254    fn sidecar_path_derives_next_to_absolute_db() {
255        let db = PathBuf::from("/var/data/graphrag.sqlite");
256        assert_eq!(
257            sidecar_path(&db, ".enrich-queue.sqlite"),
258            PathBuf::from("/var/data/.enrich-queue.sqlite")
259        );
260    }
261
262    #[test]
263    fn sidecar_path_bare_filename_falls_back_to_cwd() {
264        let db = PathBuf::from("graphrag.sqlite");
265        assert_eq!(
266            sidecar_path(&db, ".enrich-queue.sqlite"),
267            PathBuf::from(".enrich-queue.sqlite")
268        );
269    }
270
271    #[test]
272    fn sidecar_path_relative_subdir_db() {
273        let db = PathBuf::from("sub/dir/db.sqlite");
274        assert_eq!(
275            sidecar_path(&db, ".ingest-queue.sqlite"),
276            PathBuf::from("sub/dir/.ingest-queue.sqlite")
277        );
278    }
279}