Skip to main content

seshat_storage/repository/
package_metadata_repository.rs

1//! SQLite implementation of [`PackageMetadataRepository`].
2
3use std::sync::{Arc, Mutex};
4
5use rusqlite::{Connection, params};
6
7use super::PackageMetadataRepository;
8use crate::StorageError;
9
10/// A row from the `package_metadata` table.
11#[derive(Debug, Clone, PartialEq, Eq)]
12pub struct PackageMetadataRow {
13    /// Package name.
14    pub name: String,
15    /// Registry identifier (e.g., `"crates_io"`, `"npm"`, `"pypi"`).
16    pub registry: String,
17    /// JSON array of category strings.
18    pub categories: Vec<String>,
19    /// JSON array of keyword strings.
20    pub keywords: Vec<String>,
21    /// Package description, if available.
22    pub description: Option<String>,
23    /// Unix timestamp when metadata was fetched.
24    pub fetched_at: i64,
25}
26
27/// SQLite-backed package metadata repository.
28#[derive(Debug, Clone)]
29pub struct SqlitePackageMetadataRepository {
30    conn: Arc<Mutex<Connection>>,
31}
32
33impl SqlitePackageMetadataRepository {
34    /// Create a new repository backed by the given connection.
35    pub fn new(conn: Arc<Mutex<Connection>>) -> Self {
36        Self { conn }
37    }
38
39    fn conn(&self) -> Result<std::sync::MutexGuard<'_, Connection>, StorageError> {
40        self.conn.lock().map_err(|e| {
41            StorageError::QueryError(format!("Failed to acquire connection lock: {e}"))
42        })
43    }
44}
45
46impl PackageMetadataRepository for SqlitePackageMetadataRepository {
47    #[tracing::instrument(skip(self))]
48    fn upsert(&self, row: &PackageMetadataRow) -> Result<(), StorageError> {
49        let conn = self.conn()?;
50
51        let categories_json = serde_json::to_string(&row.categories)
52            .map_err(|e| StorageError::SerializationError(e.to_string()))?;
53        let keywords_json = serde_json::to_string(&row.keywords)
54            .map_err(|e| StorageError::SerializationError(e.to_string()))?;
55
56        conn.execute(
57            "INSERT INTO package_metadata (name, registry, categories, keywords, description, fetched_at)
58             VALUES (?1, ?2, ?3, ?4, ?5, ?6)
59             ON CONFLICT(name, registry) DO UPDATE SET
60               categories = excluded.categories,
61               keywords   = excluded.keywords,
62               description = excluded.description,
63               fetched_at  = excluded.fetched_at",
64            params![
65                row.name,
66                row.registry,
67                categories_json,
68                keywords_json,
69                row.description,
70                row.fetched_at,
71            ],
72        )?;
73
74        Ok(())
75    }
76
77    #[tracing::instrument(skip(self))]
78    fn get(&self, name: &str, registry: &str) -> Result<Option<PackageMetadataRow>, StorageError> {
79        let conn = self.conn()?;
80
81        let result = conn.query_row(
82            "SELECT name, registry, categories, keywords, description, fetched_at
83             FROM package_metadata WHERE name = ?1 AND registry = ?2",
84            params![name, registry],
85            row_to_package_metadata,
86        );
87
88        match result {
89            Ok(row) => Ok(Some(row)),
90            Err(rusqlite::Error::QueryReturnedNoRows) => Ok(None),
91            Err(e) => Err(StorageError::from(e)),
92        }
93    }
94
95    #[tracing::instrument(skip(self))]
96    fn get_by_registry(&self, registry: &str) -> Result<Vec<PackageMetadataRow>, StorageError> {
97        let conn = self.conn()?;
98        let mut stmt = conn.prepare(
99            "SELECT name, registry, categories, keywords, description, fetched_at
100             FROM package_metadata WHERE registry = ?1",
101        )?;
102        let rows = stmt.query_map([registry], row_to_package_metadata)?;
103        rows.collect::<Result<Vec<_>, _>>().map_err(Into::into)
104    }
105
106    #[tracing::instrument(skip(self))]
107    fn delete_stale(&self, before_timestamp: i64) -> Result<usize, StorageError> {
108        let conn = self.conn()?;
109        let affected = conn.execute(
110            "DELETE FROM package_metadata WHERE fetched_at < ?1",
111            params![before_timestamp],
112        )?;
113        Ok(affected)
114    }
115}
116
117/// Map a rusqlite `Row` to a [`PackageMetadataRow`].
118fn row_to_package_metadata(row: &rusqlite::Row<'_>) -> rusqlite::Result<PackageMetadataRow> {
119    let name: String = row.get(0)?;
120    let registry: String = row.get(1)?;
121    let categories_json: String = row.get(2)?;
122    let keywords_json: String = row.get(3)?;
123    let description: Option<String> = row.get(4)?;
124    let fetched_at: i64 = row.get(5)?;
125
126    let categories: Vec<String> = serde_json::from_str(&categories_json).map_err(|e| {
127        rusqlite::Error::FromSqlConversionFailure(2, rusqlite::types::Type::Text, Box::new(e))
128    })?;
129
130    let keywords: Vec<String> = serde_json::from_str(&keywords_json).map_err(|e| {
131        rusqlite::Error::FromSqlConversionFailure(3, rusqlite::types::Type::Text, Box::new(e))
132    })?;
133
134    Ok(PackageMetadataRow {
135        name,
136        registry,
137        categories,
138        keywords,
139        description,
140        fetched_at,
141    })
142}
143
144#[cfg(test)]
145mod tests {
146    use super::*;
147    use crate::Database;
148
149    /// Helper: create an in-memory DB and return a `SqlitePackageMetadataRepository`.
150    fn test_repo() -> SqlitePackageMetadataRepository {
151        let db = Database::open(":memory:").expect("in-memory DB");
152        SqlitePackageMetadataRepository::new(db.connection().clone())
153    }
154
155    fn make_row(name: &str, registry: &str) -> PackageMetadataRow {
156        PackageMetadataRow {
157            name: name.to_string(),
158            registry: registry.to_string(),
159            categories: vec!["web".to_string(), "http".to_string()],
160            keywords: vec!["async".to_string(), "server".to_string()],
161            description: Some("A web framework".to_string()),
162            fetched_at: 1_700_000_000,
163        }
164    }
165
166    #[test]
167    fn upsert_and_get() {
168        let repo = test_repo();
169        let row = make_row("actix-web", "crates_io");
170
171        repo.upsert(&row).expect("upsert should succeed");
172
173        let fetched = repo
174            .get("actix-web", "crates_io")
175            .expect("get should succeed")
176            .expect("row should exist");
177
178        assert_eq!(fetched.name, "actix-web");
179        assert_eq!(fetched.registry, "crates_io");
180        assert_eq!(fetched.categories, vec!["web", "http"]);
181        assert_eq!(fetched.keywords, vec!["async", "server"]);
182        assert_eq!(fetched.description, Some("A web framework".to_string()));
183        assert_eq!(fetched.fetched_at, 1_700_000_000);
184    }
185
186    #[test]
187    fn upsert_updates_existing() {
188        let repo = test_repo();
189        let mut row = make_row("serde", "crates_io");
190        repo.upsert(&row).expect("first upsert");
191
192        // Update with new data
193        row.categories = vec!["serialization".to_string()];
194        row.keywords = vec!["json".to_string(), "serde".to_string()];
195        row.description = Some("Serialization framework".to_string());
196        row.fetched_at = 1_700_100_000;
197        repo.upsert(&row).expect("second upsert");
198
199        let fetched = repo
200            .get("serde", "crates_io")
201            .expect("get should succeed")
202            .expect("row should exist");
203
204        assert_eq!(fetched.categories, vec!["serialization"]);
205        assert_eq!(fetched.keywords, vec!["json", "serde"]);
206        assert_eq!(
207            fetched.description,
208            Some("Serialization framework".to_string())
209        );
210        assert_eq!(fetched.fetched_at, 1_700_100_000);
211    }
212
213    #[test]
214    fn get_not_found() {
215        let repo = test_repo();
216
217        let result = repo
218            .get("nonexistent", "crates_io")
219            .expect("get should not error");
220
221        assert!(result.is_none());
222    }
223
224    #[test]
225    fn get_by_registry() {
226        let repo = test_repo();
227
228        repo.upsert(&make_row("serde", "crates_io")).unwrap();
229        repo.upsert(&make_row("tokio", "crates_io")).unwrap();
230        repo.upsert(&make_row("express", "npm")).unwrap();
231
232        let crates = repo
233            .get_by_registry("crates_io")
234            .expect("query should succeed");
235        assert_eq!(crates.len(), 2);
236
237        let npm = repo.get_by_registry("npm").expect("query should succeed");
238        assert_eq!(npm.len(), 1);
239        assert_eq!(npm[0].name, "express");
240
241        let pypi = repo.get_by_registry("pypi").expect("query should succeed");
242        assert!(pypi.is_empty());
243    }
244
245    #[test]
246    fn delete_stale() {
247        let repo = test_repo();
248
249        let mut old = make_row("old-pkg", "crates_io");
250        old.fetched_at = 1_000_000;
251        repo.upsert(&old).unwrap();
252
253        let mut recent = make_row("new-pkg", "crates_io");
254        recent.fetched_at = 2_000_000;
255        repo.upsert(&recent).unwrap();
256
257        let deleted = repo.delete_stale(1_500_000).expect("delete should succeed");
258        assert_eq!(deleted, 1);
259
260        assert!(repo.get("old-pkg", "crates_io").unwrap().is_none());
261        assert!(repo.get("new-pkg", "crates_io").unwrap().is_some());
262    }
263
264    #[test]
265    fn empty_categories_and_keywords() {
266        let repo = test_repo();
267
268        let row = PackageMetadataRow {
269            name: "minimal".to_string(),
270            registry: "npm".to_string(),
271            categories: vec![],
272            keywords: vec![],
273            description: None,
274            fetched_at: 1_700_000_000,
275        };
276
277        repo.upsert(&row).expect("upsert should succeed");
278
279        let fetched = repo
280            .get("minimal", "npm")
281            .expect("get should succeed")
282            .expect("row should exist");
283
284        assert!(fetched.categories.is_empty());
285        assert!(fetched.keywords.is_empty());
286        assert!(fetched.description.is_none());
287    }
288
289    #[test]
290    fn same_name_different_registry() {
291        let repo = test_repo();
292
293        let crate_row = PackageMetadataRow {
294            name: "requests".to_string(),
295            registry: "crates_io".to_string(),
296            categories: vec!["http".to_string()],
297            keywords: vec!["http".to_string()],
298            description: Some("Rust HTTP".to_string()),
299            fetched_at: 1_700_000_000,
300        };
301
302        let pypi_row = PackageMetadataRow {
303            name: "requests".to_string(),
304            registry: "pypi".to_string(),
305            categories: vec!["internet".to_string()],
306            keywords: vec!["http".to_string(), "python".to_string()],
307            description: Some("Python HTTP".to_string()),
308            fetched_at: 1_700_000_000,
309        };
310
311        repo.upsert(&crate_row).unwrap();
312        repo.upsert(&pypi_row).unwrap();
313
314        let crate_fetched = repo.get("requests", "crates_io").unwrap().unwrap();
315        let pypi_fetched = repo.get("requests", "pypi").unwrap().unwrap();
316
317        assert_eq!(crate_fetched.categories, vec!["http"]);
318        assert_eq!(pypi_fetched.categories, vec!["internet"]);
319        assert_eq!(crate_fetched.description, Some("Rust HTTP".to_string()));
320        assert_eq!(pypi_fetched.description, Some("Python HTTP".to_string()));
321    }
322}