Skip to main content

synwire_storage/
dependency_index.rs

1//! Global cross-project dependency index.
2//!
3//! Parses project manifests (`Cargo.toml`, `go.mod`, `package.json`,
4//! `pyproject.toml`) to build a graph of which projects depend on which
5//! libraries.  Stored in `global/dependencies/deps.db` via `SQLite`.
6
7use rusqlite::params;
8use std::path::Path;
9
10/// A single project→dependency edge in the dependency index.
11#[derive(Debug, Clone, PartialEq, Eq)]
12#[non_exhaustive]
13pub struct DependencyEntry {
14    /// Project root path.
15    pub project_path: String,
16    /// Dependency name.
17    pub dependency: String,
18    /// Dependency version requirement (e.g., `"^1.2.0"`).
19    pub version_req: String,
20    /// Ecosystem: `"cargo"`, `"go"`, `"npm"`, or `"python"`.
21    pub ecosystem: String,
22}
23
24/// Errors produced by [`DependencyIndex`].
25#[derive(Debug, thiserror::Error)]
26#[non_exhaustive]
27pub enum DependencyIndexError {
28    /// `SQLite` operation failed.
29    #[error("sqlite error: {0}")]
30    Sqlite(String),
31    /// I/O operation failed.
32    #[error("io error: {0}")]
33    Io(String),
34    /// Manifest parse error.
35    #[error("parse error: {0}")]
36    Parse(String),
37}
38
39impl From<rusqlite::Error> for DependencyIndexError {
40    fn from(e: rusqlite::Error) -> Self {
41        Self::Sqlite(e.to_string())
42    }
43}
44
45impl From<std::io::Error> for DependencyIndexError {
46    fn from(e: std::io::Error) -> Self {
47        Self::Io(e.to_string())
48    }
49}
50
51/// Global cross-project dependency index backed by `SQLite`.
52///
53/// # Thread safety
54///
55/// The underlying [`rusqlite::Connection`] is guarded by a `Mutex` so
56/// `DependencyIndex` is `Send + Sync`.
57pub struct DependencyIndex {
58    conn: std::sync::Mutex<rusqlite::Connection>,
59}
60
61impl DependencyIndex {
62    /// Open or create the dependency index at the given path.
63    ///
64    /// Creates parent directories if they do not exist, opens the database in
65    /// WAL mode, and initialises the schema.
66    ///
67    /// # Errors
68    ///
69    /// Returns [`DependencyIndexError::Io`] if the parent directory cannot be
70    /// created, or [`DependencyIndexError::Sqlite`] if the database cannot be
71    /// opened or the schema cannot be initialised.
72    pub fn open(path: &Path) -> Result<Self, DependencyIndexError> {
73        if let Some(parent) = path.parent() {
74            std::fs::create_dir_all(parent).map_err(|e| DependencyIndexError::Io(e.to_string()))?;
75        }
76
77        let conn = rusqlite::Connection::open(path)
78            .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
79
80        conn.execute_batch(
81            "PRAGMA journal_mode=WAL; PRAGMA synchronous=NORMAL; PRAGMA foreign_keys=ON;",
82        )
83        .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
84
85        conn.execute_batch(
86            "CREATE TABLE IF NOT EXISTS dependencies (
87                id           INTEGER PRIMARY KEY AUTOINCREMENT,
88                project_path TEXT NOT NULL,
89                dependency   TEXT NOT NULL,
90                version_req  TEXT NOT NULL,
91                ecosystem    TEXT NOT NULL
92             );
93             CREATE INDEX IF NOT EXISTS idx_dep  ON dependencies(dependency);
94             CREATE INDEX IF NOT EXISTS idx_proj ON dependencies(project_path);",
95        )
96        .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
97
98        Ok(Self {
99            conn: std::sync::Mutex::new(conn),
100        })
101    }
102
103    /// Parse a project manifest file and index its dependencies.
104    ///
105    /// Detects the manifest type from files present in `project_root` and
106    /// inserts all discovered dependencies.  Returns the count of rows
107    /// inserted.
108    ///
109    /// # Errors
110    ///
111    /// Returns [`DependencyIndexError::Io`] if the manifest cannot be read, or
112    /// [`DependencyIndexError::Parse`] if the manifest is malformed, or
113    /// [`DependencyIndexError::Sqlite`] if a database write fails.
114    pub fn index_project(&self, project_root: &Path) -> Result<usize, DependencyIndexError> {
115        let project_path = project_root.to_string_lossy().into_owned();
116
117        let mut entries: Vec<(String, String, String)> = Vec::new();
118
119        let cargo_path = project_root.join("Cargo.toml");
120        let gomod_path = project_root.join("go.mod");
121        let pkg_json_path = project_root.join("package.json");
122        let pyproject_path = project_root.join("pyproject.toml");
123
124        if cargo_path.exists() {
125            let content = std::fs::read_to_string(&cargo_path)
126                .map_err(|e| DependencyIndexError::Io(e.to_string()))?;
127            Self::parse_cargo_toml(&content, &mut entries)
128                .map_err(|e| DependencyIndexError::Parse(e.to_string()))?;
129        } else if gomod_path.exists() {
130            let content = std::fs::read_to_string(&gomod_path)
131                .map_err(|e| DependencyIndexError::Io(e.to_string()))?;
132            Self::parse_go_mod(&content, &mut entries);
133        } else if pkg_json_path.exists() {
134            let content = std::fs::read_to_string(&pkg_json_path)
135                .map_err(|e| DependencyIndexError::Io(e.to_string()))?;
136            Self::parse_package_json(&content, &mut entries)
137                .map_err(|e| DependencyIndexError::Parse(e.to_string()))?;
138        } else if pyproject_path.exists() {
139            let content = std::fs::read_to_string(&pyproject_path)
140                .map_err(|e| DependencyIndexError::Io(e.to_string()))?;
141            Self::parse_pyproject_toml(&content, &mut entries)
142                .map_err(|e| DependencyIndexError::Parse(e.to_string()))?;
143        }
144
145        let count = entries.len();
146        for (name, version_req, ecosystem) in entries {
147            self.insert_dep(&project_path, &name, &version_req, &ecosystem)?;
148        }
149        Ok(count)
150    }
151
152    /// Query: which projects depend on the given library?
153    ///
154    /// # Errors
155    ///
156    /// Returns [`DependencyIndexError::Sqlite`] if the query fails.
157    pub fn projects_using(
158        &self,
159        dependency: &str,
160    ) -> Result<Vec<DependencyEntry>, DependencyIndexError> {
161        self.query_entries(
162            "SELECT project_path, dependency, version_req, ecosystem \
163             FROM dependencies WHERE dependency = ?1",
164            dependency,
165        )
166    }
167
168    /// Query: what does the given project depend on?
169    ///
170    /// # Errors
171    ///
172    /// Returns [`DependencyIndexError::Sqlite`] if the query fails.
173    pub fn dependencies_of(
174        &self,
175        project_path: &str,
176    ) -> Result<Vec<DependencyEntry>, DependencyIndexError> {
177        self.query_entries(
178            "SELECT project_path, dependency, version_req, ecosystem \
179             FROM dependencies WHERE project_path = ?1",
180            project_path,
181        )
182    }
183
184    // -----------------------------------------------------------------------
185    // Private helpers
186    // -----------------------------------------------------------------------
187
188    /// Execute a single-parameter SELECT and collect the result rows.
189    #[allow(clippy::significant_drop_tightening)]
190    fn query_entries(
191        &self,
192        sql: &str,
193        param: &str,
194    ) -> Result<Vec<DependencyEntry>, DependencyIndexError> {
195        let guard = self
196            .conn
197            .lock()
198            .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
199        let mut stmt = guard
200            .prepare(sql)
201            .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
202        // stmt borrows guard, so we collect eagerly to release the lock as
203        // soon as the scope ends.
204        stmt.query_map(params![param], |row| {
205            Ok(DependencyEntry {
206                project_path: row.get(0)?,
207                dependency: row.get(1)?,
208                version_req: row.get(2)?,
209                ecosystem: row.get(3)?,
210            })
211        })
212        .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?
213        .map(|r| r.map_err(|e| DependencyIndexError::Sqlite(e.to_string())))
214        .collect()
215    }
216
217    #[allow(clippy::significant_drop_tightening)]
218    fn insert_dep(
219        &self,
220        project_path: &str,
221        dependency: &str,
222        version_req: &str,
223        ecosystem: &str,
224    ) -> Result<(), DependencyIndexError> {
225        let guard = self
226            .conn
227            .lock()
228            .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
229        // The guard is held for the duration of this call; the lint suggests
230        // dropping it earlier but stmt borrows it so it cannot be released sooner.
231        let _rows = guard
232            .execute(
233                "INSERT INTO dependencies (project_path, dependency, version_req, ecosystem) \
234                 VALUES (?1, ?2, ?3, ?4)",
235                params![project_path, dependency, version_req, ecosystem],
236            )
237            .map_err(|e| DependencyIndexError::Sqlite(e.to_string()))?;
238        Ok(())
239    }
240
241    /// Parse `[dependencies]` and `[dev-dependencies]` from a `Cargo.toml`.
242    fn parse_cargo_toml(
243        content: &str,
244        out: &mut Vec<(String, String, String)>,
245    ) -> Result<(), toml::de::Error> {
246        let value: toml::Value = toml::from_str(content)?;
247        for table_key in &["dependencies", "dev-dependencies"] {
248            if let Some(deps) = value.get(table_key).and_then(|v| v.as_table()) {
249                for (name, spec) in deps {
250                    let version_req = match spec {
251                        toml::Value::String(s) => s.clone(),
252                        toml::Value::Table(t) => t
253                            .get("version")
254                            .and_then(|v| v.as_str())
255                            .unwrap_or("*")
256                            .to_owned(),
257                        _ => "*".to_owned(),
258                    };
259                    out.push((name.clone(), version_req, "cargo".to_owned()));
260                }
261            }
262        }
263        Ok(())
264    }
265
266    /// Parse `require` blocks from a `go.mod`.
267    fn parse_go_mod(content: &str, out: &mut Vec<(String, String, String)>) {
268        let mut in_require_block = false;
269        for line in content.lines() {
270            let trimmed = line.trim();
271            if trimmed == "require (" {
272                in_require_block = true;
273                continue;
274            }
275            if in_require_block {
276                if trimmed == ")" {
277                    in_require_block = false;
278                    continue;
279                }
280                // Expected format inside block: <module> v<version>
281                Self::push_go_dep(trimmed, out);
282            } else if let Some(rest) = trimmed.strip_prefix("require ") {
283                // Single-line require: require <module> v<version>
284                let rest = rest.trim();
285                if !rest.starts_with('(') {
286                    Self::push_go_dep(rest, out);
287                }
288            }
289        }
290    }
291
292    /// Parse a single `<module> v<version>` line and push to `out`.
293    fn push_go_dep(line: &str, out: &mut Vec<(String, String, String)>) {
294        let parts: Vec<&str> = line.splitn(2, ' ').collect();
295        if parts.len() == 2 {
296            let module = parts[0].to_owned();
297            let version = parts[1].trim_start_matches('v').to_owned();
298            out.push((module, version, "go".to_owned()));
299        }
300    }
301
302    /// Parse `dependencies` and `devDependencies` from a `package.json`.
303    fn parse_package_json(
304        content: &str,
305        out: &mut Vec<(String, String, String)>,
306    ) -> Result<(), serde_json::Error> {
307        let json: serde_json::Value = serde_json::from_str(content)?;
308        for key in &["dependencies", "devDependencies"] {
309            if let Some(deps) = json.get(key).and_then(|v| v.as_object()) {
310                for (name, version) in deps {
311                    let version_req = version.as_str().unwrap_or("*").to_owned();
312                    out.push((name.clone(), version_req, "npm".to_owned()));
313                }
314            }
315        }
316        Ok(())
317    }
318
319    /// Parse `[tool.poetry.dependencies]` or `[project.dependencies]` from a
320    /// `pyproject.toml`.
321    fn parse_pyproject_toml(
322        content: &str,
323        out: &mut Vec<(String, String, String)>,
324    ) -> Result<(), toml::de::Error> {
325        let value: toml::Value = toml::from_str(content)?;
326
327        // PEP 517 / setuptools: [project.dependencies] is a list of strings.
328        if let Some(deps) = value
329            .get("project")
330            .and_then(|v| v.get("dependencies"))
331            .and_then(|v| v.as_array())
332        {
333            for dep in deps {
334                if let Some(s) = dep.as_str() {
335                    // e.g. "requests>=2.28"
336                    let name = s
337                        .split(['>', '<', '=', '!', '~'])
338                        .next()
339                        .unwrap_or(s)
340                        .trim()
341                        .to_owned();
342                    let version_req = if name.len() < s.len() {
343                        s[name.len()..].trim().to_owned()
344                    } else {
345                        "*".to_owned()
346                    };
347                    out.push((name, version_req, "python".to_owned()));
348                }
349            }
350        }
351
352        // Poetry: [tool.poetry.dependencies] is a table.
353        if let Some(deps) = value
354            .get("tool")
355            .and_then(|v| v.get("poetry"))
356            .and_then(|v| v.get("dependencies"))
357            .and_then(|v| v.as_table())
358        {
359            for (name, spec) in deps {
360                if name == "python" {
361                    continue;
362                }
363                let version_req = match spec {
364                    toml::Value::String(s) => s.clone(),
365                    toml::Value::Table(t) => t
366                        .get("version")
367                        .and_then(|v| v.as_str())
368                        .unwrap_or("*")
369                        .to_owned(),
370                    _ => "*".to_owned(),
371                };
372                out.push((name.clone(), version_req, "python".to_owned()));
373            }
374        }
375
376        Ok(())
377    }
378}
379
380// Ensure the public API is `Send + Sync`.
381const _: () = {
382    const fn assert_send_sync<T: Send + Sync>() {}
383    const fn check() {
384        assert_send_sync::<DependencyIndex>();
385    }
386    let _ = check;
387};
388
389#[cfg(test)]
390#[allow(clippy::expect_used, clippy::unwrap_used)]
391mod tests {
392    use super::*;
393    use tempfile::tempdir;
394
395    #[test]
396    fn dependency_index_cargo() {
397        let dir = tempdir().expect("tempdir");
398        let cargo_toml =
399            "[package]\nname = \"test\"\n\n[dependencies]\nserde = \"1\"\ntokio = \"1\"\n";
400        std::fs::write(dir.path().join("Cargo.toml"), cargo_toml).expect("write Cargo.toml");
401
402        let db_path = dir.path().join("deps.db");
403        let idx = DependencyIndex::open(&db_path).expect("open");
404        let count = idx.index_project(dir.path()).expect("index_project");
405        assert!(count >= 2);
406
407        let projects = idx.projects_using("serde").expect("projects_using");
408        assert!(!projects.is_empty());
409    }
410
411    #[test]
412    fn dependency_index_go_mod() {
413        let dir = tempdir().expect("tempdir");
414        let go_mod = "module example.com/myapp\n\ngo 1.21\n\nrequire (\n\tgithub.com/gin-gonic/gin v1.9.1\n\tgolang.org/x/net v0.20.0\n)\n";
415        std::fs::write(dir.path().join("go.mod"), go_mod).expect("write go.mod");
416
417        let db_path = dir.path().join("deps.db");
418        let idx = DependencyIndex::open(&db_path).expect("open");
419        let count = idx.index_project(dir.path()).expect("index_project");
420        assert_eq!(count, 2);
421
422        let projects = idx
423            .projects_using("github.com/gin-gonic/gin")
424            .expect("projects_using");
425        assert!(!projects.is_empty());
426        assert_eq!(projects[0].ecosystem, "go");
427    }
428
429    #[test]
430    fn dependency_index_package_json() {
431        let dir = tempdir().expect("tempdir");
432        let pkg_json = r#"{"name":"myapp","dependencies":{"react":"^18.0.0","axios":"^1.0.0"}}"#;
433        std::fs::write(dir.path().join("package.json"), pkg_json).expect("write package.json");
434
435        let db_path = dir.path().join("deps.db");
436        let idx = DependencyIndex::open(&db_path).expect("open");
437        let count = idx.index_project(dir.path()).expect("index_project");
438        assert_eq!(count, 2);
439
440        let projects = idx.projects_using("react").expect("projects_using");
441        assert!(!projects.is_empty());
442        assert_eq!(projects[0].ecosystem, "npm");
443    }
444
445    #[test]
446    fn dependencies_of_returns_all_for_project() {
447        let dir = tempdir().expect("tempdir");
448        let cargo_toml =
449            "[package]\nname = \"x\"\n\n[dependencies]\na = \"1\"\nb = \"2\"\nc = \"3\"\n";
450        std::fs::write(dir.path().join("Cargo.toml"), cargo_toml).expect("write");
451
452        let db_path = dir.path().join("deps.db");
453        let idx = DependencyIndex::open(&db_path).expect("open");
454        let _ = idx.index_project(dir.path()).expect("index");
455
456        let deps = idx
457            .dependencies_of(&dir.path().to_string_lossy())
458            .expect("dependencies_of");
459        assert_eq!(deps.len(), 3);
460    }
461}