Skip to main content

seshat_scanner/registry/
mod.rs

1//! Package registry metadata types, client trait, and implementations.
2//!
3//! Defines the [`PackageRegistryClient`] trait for fetching metadata from
4//! package registries (crates.io, npm, PyPI) and the associated types.
5//!
6//! Concrete implementations:
7//! - [`crates_io::CratesIoClient`] — fetches from crates.io REST API
8//! - [`npm::NpmClient`] — fetches from npm registry API
9//! - [`pypi::PyPIClient`] — fetches from PyPI JSON API
10
11pub mod crates_io;
12pub mod npm;
13pub mod pypi;
14pub mod registry_mapping;
15
16use std::time::Duration;
17
18use serde::{Deserialize, Serialize};
19use ureq::Agent;
20
21/// Cache TTL for package metadata: 30 days in seconds.
22///
23/// Entries older than this are considered stale and will be re-fetched
24/// on the next scan.
25pub const CACHE_TTL_SECS: i64 = 30 * 24 * 60 * 60;
26
27/// Which package registry a dependency originates from.
28#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
29#[serde(rename_all = "snake_case")]
30pub enum Registry {
31    /// Rust crates: <https://crates.io>
32    CratesIo,
33    /// Node.js packages: <https://www.npmjs.com>
34    Npm,
35    /// Python packages: <https://pypi.org>
36    PyPI,
37}
38
39impl Registry {
40    /// String representation used in the database `registry` column.
41    #[must_use]
42    pub fn as_str(&self) -> &'static str {
43        match self {
44            Self::CratesIo => "crates_io",
45            Self::Npm => "npm",
46            Self::PyPI => "pypi",
47        }
48    }
49}
50
51impl std::fmt::Display for Registry {
52    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
53        f.write_str(self.as_str())
54    }
55}
56
57impl std::str::FromStr for Registry {
58    type Err = String;
59
60    fn from_str(s: &str) -> Result<Self, Self::Err> {
61        match s {
62            "crates_io" => Ok(Self::CratesIo),
63            "npm" => Ok(Self::Npm),
64            "pypi" => Ok(Self::PyPI),
65            other => Err(format!("unknown registry: {other}")),
66        }
67    }
68}
69
70/// Metadata fetched from a package registry.
71#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
72pub struct PackageMetadata {
73    /// Package name as it appears in the registry.
74    pub name: String,
75    /// Which registry this metadata was fetched from.
76    pub registry: Registry,
77    /// Registry-defined categories (e.g., crates.io categories, PyPI classifiers).
78    pub categories: Vec<String>,
79    /// Author-defined keywords.
80    pub keywords: Vec<String>,
81    /// Package description, if available.
82    pub description: Option<String>,
83}
84
85/// Errors that can occur when fetching package metadata from a registry.
86#[derive(Debug, thiserror::Error)]
87pub enum RegistryError {
88    /// HTTP request failed (timeout, DNS, connection error, etc.).
89    #[error("HTTP error fetching {package} from {registry}: {reason}")]
90    HttpError {
91        package: String,
92        registry: Registry,
93        reason: String,
94    },
95
96    /// The registry returned a non-success status code.
97    #[error("{registry} returned status {status} for {package}")]
98    StatusError {
99        package: String,
100        registry: Registry,
101        status: u16,
102    },
103
104    /// Failed to parse the registry's JSON response.
105    #[error("Failed to parse {registry} response for {package}: {reason}")]
106    ParseError {
107        package: String,
108        registry: Registry,
109        reason: String,
110    },
111
112    /// The requested package was not found (404).
113    #[error("Package {package} not found on {registry}")]
114    NotFound { package: String, registry: Registry },
115}
116
117/// Trait for fetching package metadata from a registry.
118///
119/// Each registry (crates.io, npm, PyPI) provides a concrete implementation.
120/// Implementations must:
121/// - Set an appropriate `User-Agent` header per registry API policies
122/// - Use a reasonable timeout (≤ 5 seconds)
123/// - Return `RegistryError` on failure rather than panicking
124pub trait PackageRegistryClient: Send + Sync {
125    /// Fetch metadata for the given package from the registry.
126    ///
127    /// # Errors
128    ///
129    /// Returns [`RegistryError`] if the HTTP request fails, the package is
130    /// not found, or the response cannot be parsed.
131    fn fetch_metadata(&self, package_name: &str) -> Result<PackageMetadata, RegistryError>;
132}
133
134// ---------------------------------------------------------------------------
135// Shared HTTP infrastructure for registry clients
136// ---------------------------------------------------------------------------
137
138/// User-Agent header value per registry API policies.
139const USER_AGENT: &str = concat!("seshat/", env!("CARGO_PKG_VERSION"));
140
141/// Request timeout in seconds.
142const TIMEOUT_SECS: u64 = 5;
143
144/// Shared HTTP transport for all registry clients.
145///
146/// Handles [`ureq::Agent`] creation, timeout, `User-Agent` header, error
147/// mapping, and response body reading. Each concrete client (`CratesIoClient`,
148/// `NpmClient`, `PyPIClient`) wraps this and adds only its JSON parsing logic.
149pub(crate) struct RegistryHttpClient {
150    agent: Agent,
151    base_url: String,
152    registry: Registry,
153    /// Suffix appended after `/{package_name}` in the URL.
154    ///
155    /// Most registries use `""` (empty), PyPI uses `"/json"`.
156    url_suffix: &'static str,
157}
158
159impl RegistryHttpClient {
160    /// Create a new HTTP client for the given registry.
161    pub(crate) fn new(
162        registry: Registry,
163        default_base_url: &str,
164        url_suffix: &'static str,
165    ) -> Self {
166        let config = Agent::config_builder()
167            .timeout_global(Some(Duration::from_secs(TIMEOUT_SECS)))
168            .build();
169        Self {
170            agent: config.into(),
171            base_url: default_base_url.to_owned(),
172            registry,
173            url_suffix,
174        }
175    }
176
177    /// Create a new HTTP client with a custom base URL (for testing).
178    #[cfg(test)]
179    pub(crate) fn with_base_url(
180        registry: Registry,
181        base_url: &str,
182        url_suffix: &'static str,
183    ) -> Self {
184        let config = Agent::config_builder()
185            .timeout_global(Some(Duration::from_secs(TIMEOUT_SECS)))
186            .build();
187        Self {
188            agent: config.into(),
189            base_url: base_url.to_owned(),
190            registry,
191            url_suffix,
192        }
193    }
194
195    /// The current base URL (useful for assertions in tests).
196    #[cfg(test)]
197    pub(crate) fn base_url(&self) -> &str {
198        &self.base_url
199    }
200
201    /// Perform a GET request for the given package and return the raw
202    /// response body as a string.
203    ///
204    /// Handles HTTP errors (status codes, timeouts) and body reading,
205    /// mapping everything into [`RegistryError`].
206    pub(crate) fn fetch_raw(&self, package_name: &str) -> Result<String, RegistryError> {
207        let url = format!("{}/{}{}", self.base_url, package_name, self.url_suffix);
208
209        let response = self
210            .agent
211            .get(&url)
212            .header("User-Agent", USER_AGENT)
213            .call()
214            .map_err(|e| map_ureq_error(package_name, self.registry, e))?;
215
216        response
217            .into_body()
218            .read_to_string()
219            .map_err(|e| RegistryError::ParseError {
220                package: package_name.to_owned(),
221                registry: self.registry,
222                reason: format!("failed to read response body: {e}"),
223            })
224    }
225}
226
227/// Map a [`ureq::Error`] to our [`RegistryError`].
228fn map_ureq_error(package_name: &str, registry: Registry, err: ureq::Error) -> RegistryError {
229    match err {
230        ureq::Error::StatusCode(404) => RegistryError::NotFound {
231            package: package_name.to_owned(),
232            registry,
233        },
234        ureq::Error::StatusCode(code) => RegistryError::StatusError {
235            package: package_name.to_owned(),
236            registry,
237            status: code,
238        },
239        other => RegistryError::HttpError {
240            package: package_name.to_owned(),
241            registry,
242            reason: other.to_string(),
243        },
244    }
245}
246
247#[cfg(test)]
248mod tests {
249    use super::*;
250
251    #[test]
252    fn registry_as_str() {
253        assert_eq!(Registry::CratesIo.as_str(), "crates_io");
254        assert_eq!(Registry::Npm.as_str(), "npm");
255        assert_eq!(Registry::PyPI.as_str(), "pypi");
256    }
257
258    #[test]
259    fn registry_display() {
260        assert_eq!(format!("{}", Registry::CratesIo), "crates_io");
261        assert_eq!(format!("{}", Registry::Npm), "npm");
262        assert_eq!(format!("{}", Registry::PyPI), "pypi");
263    }
264
265    #[test]
266    fn registry_from_str_valid() {
267        assert_eq!("crates_io".parse(), Ok(Registry::CratesIo));
268        assert_eq!("npm".parse(), Ok(Registry::Npm));
269        assert_eq!("pypi".parse(), Ok(Registry::PyPI));
270    }
271
272    #[test]
273    fn registry_from_str_invalid() {
274        let result = "maven".parse::<Registry>();
275        assert!(result.is_err());
276        assert!(result.unwrap_err().contains("unknown registry"));
277    }
278
279    #[test]
280    fn registry_serde_roundtrip() {
281        let json = serde_json::to_string(&Registry::CratesIo).unwrap();
282        assert_eq!(json, "\"crates_io\"");
283        let parsed: Registry = serde_json::from_str(&json).unwrap();
284        assert_eq!(parsed, Registry::CratesIo);
285    }
286
287    #[test]
288    fn registry_serde_all_variants() {
289        for r in [Registry::CratesIo, Registry::Npm, Registry::PyPI] {
290            let json = serde_json::to_string(&r).unwrap();
291            let back: Registry = serde_json::from_str(&json).unwrap();
292            assert_eq!(back, r);
293        }
294    }
295
296    #[test]
297    fn package_metadata_construction() {
298        let meta = PackageMetadata {
299            name: "serde".to_owned(),
300            registry: Registry::CratesIo,
301            categories: vec!["parsing".to_owned()],
302            keywords: vec!["serialization".to_owned()],
303            description: Some("A serialization framework".to_owned()),
304        };
305        assert_eq!(meta.name, "serde");
306        assert_eq!(meta.registry, Registry::CratesIo);
307        assert!(!meta.categories.is_empty());
308        assert!(!meta.keywords.is_empty());
309        assert_eq!(
310            meta.description,
311            Some("A serialization framework".to_owned())
312        );
313    }
314
315    #[test]
316    fn package_metadata_no_description() {
317        let meta = PackageMetadata {
318            name: "foo".to_owned(),
319            registry: Registry::Npm,
320            categories: vec![],
321            keywords: vec![],
322            description: None,
323        };
324        assert_eq!(meta.description, None);
325    }
326
327    #[test]
328    fn registry_error_display_not_found() {
329        let err = RegistryError::NotFound {
330            package: "foobar123".to_owned(),
331            registry: Registry::CratesIo,
332        };
333        let s = err.to_string();
334        assert!(s.contains("foobar123"));
335        assert!(s.contains("crates_io"));
336        assert!(s.contains("not found"));
337    }
338
339    #[test]
340    fn registry_error_display_http_error() {
341        let err = RegistryError::HttpError {
342            package: "baz".to_owned(),
343            registry: Registry::Npm,
344            reason: "connection timeout".to_owned(),
345        };
346        let s = err.to_string();
347        assert!(s.contains("baz"));
348        assert!(s.contains("timeout"));
349        assert!(s.contains("HTTP error"));
350    }
351
352    #[test]
353    fn registry_error_display_status_error() {
354        let err = RegistryError::StatusError {
355            package: "pkg".to_owned(),
356            registry: Registry::PyPI,
357            status: 500,
358        };
359        let s = err.to_string();
360        assert!(s.contains("500"));
361        assert!(s.contains("pypi"));
362        assert!(s.contains("pkg"));
363    }
364
365    #[test]
366    fn registry_error_display_parse_error() {
367        let err = RegistryError::ParseError {
368            package: "pkg".to_owned(),
369            registry: Registry::Npm,
370            reason: "invalid JSON".to_owned(),
371        };
372        let s = err.to_string();
373        assert!(s.contains("invalid JSON"));
374    }
375
376    #[test]
377    fn cache_ttl_is_30_days() {
378        assert_eq!(CACHE_TTL_SECS, 30 * 24 * 60 * 60);
379    }
380
381    #[test]
382    fn registry_http_client_new() {
383        let client = RegistryHttpClient::new(Registry::CratesIo, "https://crates.io/api/v1", "");
384        assert_eq!(client.base_url(), "https://crates.io/api/v1");
385    }
386
387    #[test]
388    fn registry_http_client_with_base_url() {
389        let client =
390            RegistryHttpClient::with_base_url(Registry::Npm, "https://registry.npmjs.org", "");
391        assert_eq!(client.base_url(), "https://registry.npmjs.org");
392    }
393
394    #[test]
395    fn map_ureq_error_transport_error() {
396        // Trigger a connection error by connecting to a non-listening port.
397        let result = ureq::get("http://127.0.0.1:1/nonexistent").call();
398        assert!(result.is_err());
399        let cli_err = map_ureq_error("testpkg", Registry::CratesIo, result.unwrap_err());
400        assert!(matches!(cli_err, RegistryError::HttpError { .. }));
401        assert!(cli_err.to_string().contains("testpkg"));
402    }
403
404    #[test]
405    fn fetch_raw_connection_error_returns_http_error() {
406        let client = RegistryHttpClient::with_base_url(Registry::Npm, "http://127.0.0.1:1", "");
407        let result = client.fetch_raw("some-package");
408        assert!(result.is_err());
409        assert!(matches!(
410            result.unwrap_err(),
411            RegistryError::HttpError { .. }
412        ));
413    }
414}