Skip to main content

cargo_bless/
intel.rs

1//! Live intelligence layer — fetches metadata from crates.io and GitHub
2//! to assess freshness, popularity, and maintenance status.
3//!
4//! All network operations are **non-fatal**: failures are logged and the
5//! tool continues with whatever data it has.
6
7use std::collections::HashMap;
8use std::fs;
9use std::path::PathBuf;
10use std::time::{Duration, SystemTime, UNIX_EPOCH};
11
12use anyhow::{Context, Result};
13use directories::ProjectDirs;
14use serde::{Deserialize, Serialize};
15
16const USER_AGENT: &str = "cargo-bless/0.1.0 (https://github.com/Ruffian-L/cargo-bless)";
17const CACHE_TTL_SECS: u64 = 3600; // 1 hour
18
19// ── Public types ─────────────────────────────────────────────────────
20
21/// Live metadata for a single crate.
22#[derive(Debug, Clone, Serialize, Deserialize)]
23pub struct CrateIntel {
24    pub name: String,
25    pub latest_version: String,
26    pub downloads: u64,
27    pub recent_downloads: Option<u64>,
28    pub last_updated: String,
29    pub repository_url: Option<String>,
30    pub description: Option<String>,
31}
32
33/// GitHub repository activity summary.
34#[derive(Debug, Clone, Serialize, Deserialize)]
35pub struct GitHubActivity {
36    pub last_push: String,
37    pub stars: u64,
38    pub is_archived: bool,
39    pub open_issues: u64,
40}
41
42/// Cache wrapper that tracks when data was fetched.
43#[derive(Debug, Serialize, Deserialize)]
44struct CacheEntry<T> {
45    data: T,
46    fetched_at: u64,
47}
48
49impl<T> CacheEntry<T> {
50    fn is_fresh(&self) -> bool {
51        let now = SystemTime::now()
52            .duration_since(UNIX_EPOCH)
53            .unwrap_or_default()
54            .as_secs();
55        now.saturating_sub(self.fetched_at) < CACHE_TTL_SECS
56    }
57
58    fn new(data: T) -> Self {
59        let fetched_at = SystemTime::now()
60            .duration_since(UNIX_EPOCH)
61            .unwrap_or_default()
62            .as_secs();
63        Self { data, fetched_at }
64    }
65}
66
67// ── IntelClient ──────────────────────────────────────────────────────
68
69/// Client for fetching live dependency intelligence.
70pub struct IntelClient {
71    client: crates_io_api::SyncClient,
72    http: reqwest::blocking::Client,
73    cache_dir: PathBuf,
74}
75
76impl IntelClient {
77    /// Create a new IntelClient with crates.io API access and disk cache.
78    pub fn new() -> Result<Self> {
79        let client = crates_io_api::SyncClient::new(USER_AGENT, Duration::from_secs(1))
80            .context("failed to create crates.io client")?;
81        let http = reqwest::blocking::Client::builder()
82            .user_agent(USER_AGENT)
83            .timeout(Duration::from_secs(10))
84            .build()
85            .context("failed to create GitHub HTTP client")?;
86
87        let cache_dir = ProjectDirs::from("rs", "", "cargo-bless")
88            .map(|dirs| dirs.cache_dir().to_path_buf())
89            .unwrap_or_else(|| {
90                let mut fallback = std::env::temp_dir();
91                fallback.push("cargo-bless-cache");
92                fallback
93            });
94
95        fs::create_dir_all(&cache_dir).context("failed to create cache directory")?;
96
97        Ok(Self {
98            client,
99            http,
100            cache_dir,
101        })
102    }
103
104    /// Fetch live intel for a crate. Checks disk cache first (1hr TTL).
105    pub fn fetch_crate_intel(&self, name: &str) -> Result<CrateIntel> {
106        // Check cache
107        let cache_path = self.cache_dir.join(format!("{}.json", name));
108        if let Ok(contents) = fs::read_to_string(&cache_path) {
109            if let Ok(entry) = serde_json::from_str::<CacheEntry<CrateIntel>>(&contents) {
110                if entry.is_fresh() {
111                    return Ok(entry.data);
112                }
113            }
114        }
115
116        // Cache miss or stale — fetch from crates.io
117        let response = self
118            .client
119            .get_crate(name)
120            .with_context(|| format!("failed to fetch crate info for '{}'", name))?;
121
122        let crate_data = &response.crate_data;
123        let latest_version = response
124            .versions
125            .first()
126            .map(|v| v.num.clone())
127            .unwrap_or_else(|| crate_data.max_version.clone());
128
129        let intel = CrateIntel {
130            name: name.to_string(),
131            latest_version,
132            downloads: crate_data.downloads,
133            recent_downloads: crate_data.recent_downloads,
134            last_updated: crate_data.updated_at.to_string(),
135            repository_url: crate_data.repository.clone(),
136            description: crate_data.description.clone(),
137        };
138
139        // Write to cache (best-effort)
140        let entry = CacheEntry::new(intel.clone());
141        if let Ok(json) = serde_json::to_string_pretty(&entry) {
142            let _ = fs::write(&cache_path, json);
143        }
144
145        Ok(intel)
146    }
147
148    /// Fetch GitHub activity for a repository URL.
149    /// Returns None if the URL is not a GitHub URL or if the fetch fails.
150    pub fn fetch_github_activity(&self, repo_url: &str) -> Option<GitHubActivity> {
151        let (owner, repo) = parse_github_url(repo_url)?;
152
153        let url = format!("https://api.github.com/repos/{owner}/{repo}");
154        let repo_info = self
155            .http
156            .get(url)
157            .send()
158            .ok()?
159            .error_for_status()
160            .ok()?
161            .json::<GitHubRepoResponse>()
162            .ok()?;
163
164        Some(GitHubActivity {
165            last_push: repo_info.pushed_at.unwrap_or_else(|| "unknown".into()),
166            stars: repo_info.stargazers_count.unwrap_or(0),
167            is_archived: repo_info.archived.unwrap_or(false),
168            open_issues: repo_info.open_issues_count.unwrap_or(0),
169        })
170    }
171
172    /// Fetch intel for all unique crate names, returning what we can get.
173    /// Failures for individual crates are silently skipped.
174    pub fn fetch_bulk_intel(&self, crate_names: &[&str]) -> HashMap<String, CrateIntel> {
175        let mut intel = HashMap::new();
176        for name in crate_names {
177            match self.fetch_crate_intel(name) {
178                Ok(info) => {
179                    intel.insert(name.to_string(), info);
180                }
181                Err(_) => {
182                    // Non-fatal: skip this crate
183                }
184            }
185        }
186        intel
187    }
188}
189
190#[derive(Debug, Deserialize)]
191struct GitHubRepoResponse {
192    pushed_at: Option<String>,
193    stargazers_count: Option<u64>,
194    archived: Option<bool>,
195    open_issues_count: Option<u64>,
196}
197
198// ── Helpers ──────────────────────────────────────────────────────────
199
200/// Parse a GitHub URL into (owner, repo).
201/// Supports: https://github.com/owner/repo, https://github.com/owner/repo.git,
202/// https://github.com/owner/repo/tree/main, etc.
203pub fn parse_github_url(url: &str) -> Option<(String, String)> {
204    let url = url.trim().trim_end_matches('/');
205
206    // Find the github.com part
207    let after_github = if let Some(pos) = url.find("github.com/") {
208        &url[pos + "github.com/".len()..]
209    } else {
210        return None;
211    };
212
213    let parts: Vec<&str> = after_github.splitn(3, '/').collect();
214    if parts.len() < 2 {
215        return None;
216    }
217
218    let owner = parts[0].to_string();
219    let repo = parts[1].trim_end_matches(".git").to_string();
220
221    if owner.is_empty() || repo.is_empty() {
222        return None;
223    }
224
225    Some((owner, repo))
226}
227
228#[cfg(test)]
229mod tests {
230    use super::*;
231    use tempfile::TempDir;
232
233    #[test]
234    fn test_parse_github_url_basic() {
235        let result = parse_github_url("https://github.com/serde-rs/serde");
236        assert_eq!(result, Some(("serde-rs".into(), "serde".into())));
237    }
238
239    #[test]
240    fn test_parse_github_url_with_git_suffix() {
241        let result = parse_github_url("https://github.com/tokio-rs/tokio.git");
242        assert_eq!(result, Some(("tokio-rs".into(), "tokio".into())));
243    }
244
245    #[test]
246    fn test_parse_github_url_with_path() {
247        let result = parse_github_url("https://github.com/dtolnay/anyhow/tree/main");
248        assert_eq!(result, Some(("dtolnay".into(), "anyhow".into())));
249    }
250
251    #[test]
252    fn test_parse_github_url_trailing_slash() {
253        let result = parse_github_url("https://github.com/clap-rs/clap/");
254        assert_eq!(result, Some(("clap-rs".into(), "clap".into())));
255    }
256
257    #[test]
258    fn test_parse_github_url_not_github() {
259        assert!(parse_github_url("https://gitlab.com/foo/bar").is_none());
260        assert!(parse_github_url("https://crates.io/crates/serde").is_none());
261    }
262
263    #[test]
264    fn test_parse_github_url_too_short() {
265        assert!(parse_github_url("https://github.com/just-user").is_none());
266        assert!(parse_github_url("https://github.com/").is_none());
267    }
268
269    #[test]
270    fn test_cache_entry_fresh() {
271        let entry = CacheEntry::new("some data".to_string());
272        assert!(entry.is_fresh());
273    }
274
275    #[test]
276    fn test_cache_entry_stale() {
277        let entry = CacheEntry {
278            data: "old data".to_string(),
279            fetched_at: 0, // epoch = definitely stale
280        };
281        assert!(!entry.is_fresh());
282    }
283
284    #[test]
285    fn test_cache_entry_roundtrip() {
286        let intel = CrateIntel {
287            name: "serde".into(),
288            latest_version: "1.0.228".into(),
289            downloads: 100_000_000,
290            recent_downloads: Some(5_000_000),
291            last_updated: "2026-01-15T12:00:00Z".into(),
292            repository_url: Some("https://github.com/serde-rs/serde".into()),
293            description: Some("A serialization framework".into()),
294        };
295        let entry = CacheEntry::new(intel);
296        let json = serde_json::to_string(&entry).unwrap();
297        let roundtrip: CacheEntry<CrateIntel> = serde_json::from_str(&json).unwrap();
298        assert_eq!(roundtrip.data.name, "serde");
299        assert_eq!(roundtrip.data.downloads, 100_000_000);
300        assert!(roundtrip.is_fresh());
301    }
302
303    #[test]
304    fn test_cache_disk_write_and_read() {
305        let tmp = TempDir::new().unwrap();
306        let cache_path = tmp.path().join("test_crate.json");
307
308        let intel = CrateIntel {
309            name: "test_crate".into(),
310            latest_version: "0.1.0".into(),
311            downloads: 42,
312            recent_downloads: None,
313            last_updated: "2026-02-27T00:00:00Z".into(),
314            repository_url: None,
315            description: None,
316        };
317
318        // Write
319        let entry = CacheEntry::new(intel);
320        let json = serde_json::to_string_pretty(&entry).unwrap();
321        fs::write(&cache_path, &json).unwrap();
322
323        // Read back
324        let contents = fs::read_to_string(&cache_path).unwrap();
325        let loaded: CacheEntry<CrateIntel> = serde_json::from_str(&contents).unwrap();
326        assert_eq!(loaded.data.name, "test_crate");
327        assert!(loaded.is_fresh());
328    }
329
330    #[test]
331    fn test_fetch_bulk_intel() {
332        let tmp = TempDir::new().unwrap();
333
334        let mut client = IntelClient::new().unwrap();
335        client.cache_dir = tmp.path().to_path_buf();
336
337        // Inject successful cache hit
338        let intel = CrateIntel {
339            name: "test_success".into(),
340            latest_version: "1.0.0".into(),
341            downloads: 100,
342            recent_downloads: None,
343            last_updated: "2026-02-27T00:00:00Z".into(),
344            repository_url: None,
345            description: None,
346        };
347        let entry = CacheEntry::new(intel.clone());
348        let json = serde_json::to_string_pretty(&entry).unwrap();
349        fs::write(tmp.path().join("test_success.json"), json).unwrap();
350
351        // Fetch one that succeeds and one that fails (cache miss and fake crate)
352        let results = client.fetch_bulk_intel(&["test_success", "test_failure_not_exist_abc123"]);
353
354        // Validate
355        assert_eq!(results.len(), 1);
356        assert!(results.contains_key("test_success"));
357        assert_eq!(results.get("test_success").unwrap().name, "test_success");
358    }
359
360    /// Live network test — run with `cargo test -- --ignored`
361    #[test]
362    #[ignore]
363    fn test_live_fetch_serde() {
364        let client = IntelClient::new().expect("client should init");
365        let intel = client
366            .fetch_crate_intel("serde")
367            .expect("should fetch serde");
368        assert_eq!(intel.name, "serde");
369        assert!(intel.downloads > 0);
370        println!(
371            "serde: v{}, {} downloads",
372            intel.latest_version, intel.downloads
373        );
374    }
375
376    /// Live GitHub test — run with `cargo test -- --ignored`
377    #[test]
378    #[ignore]
379    fn test_live_github_serde() {
380        let client = IntelClient::new().expect("client should init");
381        let activity = client
382            .fetch_github_activity("https://github.com/serde-rs/serde")
383            .expect("should get activity");
384        assert!(activity.stars > 0);
385        println!(
386            "serde: {} stars, archived={}",
387            activity.stars, activity.is_archived
388        );
389    }
390}