Skip to main content

pi/
extension_popularity.rs

1//! Popularity signal snapshotting for extension candidates.
2//!
3//! This module is intentionally "evidence-first":
4//! - Fetch concrete metrics (GitHub stars/downloads/etc).
5//! - Normalize missing/unavailable metrics to `null` (never `0`).
6//! - Persist evidence onto the canonical candidate pool JSON so scoring can be auditable.
7
8use crate::error::{Error, Result};
9use crate::http::client::Client;
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12use std::time::Duration;
13
14const POPULARITY_REQUEST_TIMEOUT: Duration = Duration::from_secs(20);
15
16#[derive(Debug, Clone, Deserialize, Serialize)]
17pub struct CandidatePool {
18    #[serde(rename = "$schema")]
19    pub schema: String,
20    pub generated_at: String,
21    pub source_inputs: SourceInputs,
22    pub total_candidates: u64,
23    pub items: Vec<CandidateItem>,
24    pub alias_notes: Vec<AliasNote>,
25}
26
27#[derive(Debug, Clone, Deserialize, Serialize)]
28pub struct SourceInputs {
29    pub artifact_provenance: String,
30    pub artifact_root: String,
31    pub extra_npm_packages: Vec<String>,
32}
33
34#[derive(Debug, Clone, Deserialize, Serialize)]
35pub struct AliasNote {
36    pub note: String,
37}
38
39#[derive(Debug, Clone, Deserialize, Serialize)]
40pub struct CandidateItem {
41    pub id: String,
42    pub name: String,
43    pub source_tier: String,
44    pub status: String,
45    pub license: String,
46    pub retrieved: Option<String>,
47    pub artifact_path: Option<String>,
48    pub checksum: Option<Sha256Checksum>,
49    pub source: CandidateSource,
50    pub repository_url: Option<String>,
51    #[serde(default)]
52    pub popularity: PopularityEvidence,
53    pub aliases: Vec<String>,
54    pub notes: Option<String>,
55}
56
57#[derive(Debug, Clone, Deserialize, Serialize)]
58pub struct Sha256Checksum {
59    pub sha256: String,
60}
61
62#[derive(Debug, Clone, Deserialize, Serialize)]
63#[serde(tag = "type", rename_all = "snake_case")]
64pub enum CandidateSource {
65    Git {
66        repo: String,
67        #[serde(default)]
68        path: Option<String>,
69    },
70    Npm {
71        package: String,
72        version: String,
73        url: String,
74    },
75    Url {
76        url: String,
77    },
78}
79
80/// Popularity evidence schema.
81///
82/// This is the machine-joinable surface used by scoring (see `docs/EXTENSION_POPULARITY_CRITERIA.md`).
83/// When a metric is unknown/unavailable, it should be persisted as explicit `null`.
84#[derive(Debug, Clone, Default, Deserialize, Serialize)]
85pub struct PopularityEvidence {
86    pub snapshot_at: Option<String>,
87
88    // GitHub
89    pub github_repo: Option<String>,
90    pub github_stars: Option<u64>,
91    pub github_forks: Option<u64>,
92    pub github_watchers: Option<u64>,
93    pub github_open_issues: Option<u64>,
94    pub github_last_commit: Option<String>,
95
96    // npm
97    pub npm_downloads_weekly: Option<u64>,
98    pub npm_downloads_monthly: Option<u64>,
99    pub npm_last_publish: Option<String>,
100    pub npm_dependents: Option<u64>,
101
102    // Marketplace (OpenClaw / ClawHub) - not currently populated by the candidate pool.
103    pub marketplace_rank: Option<u32>,
104    pub marketplace_installs_monthly: Option<u64>,
105    pub marketplace_featured: Option<bool>,
106
107    // Mentions / references - not currently populated by the candidate pool.
108    pub mentions_count: Option<u32>,
109    pub mentions_sources: Option<Vec<String>>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq, Hash)]
113pub struct GitHubRepoRef {
114    pub owner: String,
115    pub repo: String,
116}
117
118impl GitHubRepoRef {
119    #[must_use]
120    pub fn full_name(&self) -> String {
121        format!("{}/{}", self.owner, self.repo)
122    }
123}
124
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub struct GitHubRepoMetrics {
127    pub full_name: String,
128    pub stars: u64,
129    pub forks: u64,
130    pub watchers: Option<u64>,
131    pub open_issues: u64,
132    pub pushed_at: Option<String>,
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct NpmDownloads {
137    pub weekly: Option<u64>,
138    pub monthly: Option<u64>,
139}
140
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct NpmRegistryMeta {
143    pub latest_version: Option<String>,
144    pub last_publish: Option<String>,
145    pub repository_url: Option<String>,
146}
147
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub enum GitHubRepoCandidate {
150    Repo(GitHubRepoRef),
151    /// A malformed GitHub URL that only included a single path segment, e.g. `https://github.com/foo-bar`.
152    Slug(String),
153}
154
155/// Best-effort parse of a GitHub repository reference from a URL-like string.
156///
157/// Supports:
158/// - `https://github.com/owner/repo`
159/// - `git+https://github.com/owner/repo.git`
160/// - `git@github.com:owner/repo.git`
161/// - `github.com/owner/repo`
162///
163/// For malformed single-segment URLs (e.g. `https://github.com/foo-bar`) returns a `Slug` candidate.
164#[must_use]
165pub fn github_repo_candidate_from_url(input: &str) -> Option<GitHubRepoCandidate> {
166    let raw = input.trim();
167    if raw.is_empty() {
168        return None;
169    }
170
171    let raw = raw.strip_prefix("git+").unwrap_or(raw);
172
173    if let Some(rest) = raw.strip_prefix("git@") {
174        // SCP-like: git@github.com:owner/repo(.git)
175        let (host, path) = rest.split_once(':')?;
176        if !host.eq_ignore_ascii_case("github.com") {
177            return None;
178        }
179        return parse_owner_repo_from_path(path).map(GitHubRepoCandidate::Repo);
180    }
181
182    let url_str = if raw.contains("://") {
183        raw.to_string()
184    } else {
185        format!("https://{raw}")
186    };
187
188    let Ok(url) = url::Url::parse(&url_str) else {
189        return None;
190    };
191    if url.host_str()? != "github.com" {
192        return None;
193    }
194
195    let mut segments = url.path_segments()?.filter(|seg| !seg.is_empty());
196    let ownerish = segments.next()?.to_string();
197    let repo = segments.next().map(ToString::to_string);
198
199    match repo {
200        Some(ref repo) => parse_owner_repo(&ownerish, repo).map(GitHubRepoCandidate::Repo),
201        None => Some(GitHubRepoCandidate::Slug(ownerish)),
202    }
203}
204
205#[must_use]
206pub fn github_repo_guesses_from_slug(slug: &str) -> Vec<GitHubRepoRef> {
207    let slug = slug.trim().trim_matches('/');
208    if slug.is_empty() {
209        return Vec::new();
210    }
211
212    let mut seen = HashSet::<GitHubRepoRef>::new();
213    let mut out = Vec::new();
214
215    // Common case for our third-party imports: `owner-pi-foo` should be `owner/pi-foo`.
216    if let Some((owner, suffix)) = slug.split_once("-pi-") {
217        let repo = format!("pi-{suffix}");
218        if let Some(r) = parse_owner_repo(owner, &repo) {
219            if seen.insert(r.clone()) {
220                out.push(r);
221            }
222        }
223    }
224
225    // Try first hyphen split: `owner-rest...` -> `owner/rest...`
226    if let Some((owner, repo)) = slug.split_once('-') {
227        if let Some(r) = parse_owner_repo(owner, repo) {
228            if seen.insert(r.clone()) {
229                out.push(r);
230            }
231        }
232    }
233
234    // Try last hyphen split: `owner...-repo` -> `owner.../repo`
235    if let Some((owner, repo)) = slug.rsplit_once('-') {
236        if let Some(r) = parse_owner_repo(owner, repo) {
237            if seen.insert(r.clone()) {
238                out.push(r);
239            }
240        }
241    }
242
243    out
244}
245
246pub fn parse_github_repo_response(text: &str) -> Result<GitHubRepoMetrics> {
247    #[derive(Debug, Deserialize)]
248    struct RepoResponse {
249        full_name: String,
250        stargazers_count: u64,
251        forks_count: u64,
252        #[serde(default)]
253        subscribers_count: Option<u64>,
254        open_issues_count: u64,
255        #[serde(default)]
256        pushed_at: Option<String>,
257    }
258
259    let parsed: RepoResponse = serde_json::from_str(text)
260        .map_err(|err| Error::api(format!("GitHub repo response parse error: {err}")))?;
261
262    Ok(GitHubRepoMetrics {
263        full_name: parsed.full_name,
264        stars: parsed.stargazers_count,
265        forks: parsed.forks_count,
266        watchers: parsed.subscribers_count,
267        open_issues: parsed.open_issues_count,
268        pushed_at: parsed.pushed_at,
269    })
270}
271
272pub async fn fetch_github_repo_metrics_optional(
273    client: &Client,
274    token: &str,
275    repo: &GitHubRepoRef,
276) -> Result<Option<GitHubRepoMetrics>> {
277    let url = format!("https://api.github.com/repos/{}/{}", repo.owner, repo.repo);
278    let response = client
279        .get(&url)
280        .timeout(POPULARITY_REQUEST_TIMEOUT)
281        .header("Accept", "application/vnd.github+json")
282        .header("X-GitHub-Api-Version", "2022-11-28")
283        .header("Authorization", format!("Bearer {token}"))
284        .send()
285        .await?;
286
287    let status = response.status();
288    let text = response.text().await?;
289
290    match status {
291        200 => Ok(Some(parse_github_repo_response(&text)?)),
292        404 => Ok(None),
293        other => Err(Error::api(format!("GitHub API error {other}: {text}"))),
294    }
295}
296
297pub fn parse_npm_downloads_response(text: &str) -> Result<Option<u64>> {
298    #[derive(Debug, Deserialize)]
299    struct DownloadsResponse {
300        #[serde(default)]
301        downloads: Option<u64>,
302        #[serde(default)]
303        error: Option<String>,
304    }
305
306    let parsed: DownloadsResponse = serde_json::from_str(text)
307        .map_err(|err| Error::api(format!("npm downloads response parse error: {err}")))?;
308
309    if parsed.error.is_some() {
310        return Ok(None);
311    }
312
313    Ok(parsed.downloads)
314}
315
316pub async fn fetch_npm_downloads(client: &Client, package: &str) -> Result<NpmDownloads> {
317    async fn fetch_range(client: &Client, package: &str, range: &str) -> Result<Option<u64>> {
318        let encoded = url::form_urlencoded::byte_serialize(package.as_bytes()).collect::<String>();
319        let url = format!("https://api.npmjs.org/downloads/point/{range}/{encoded}");
320        let response = client
321            .get(&url)
322            .timeout(POPULARITY_REQUEST_TIMEOUT)
323            .send()
324            .await?;
325        let text = response.text().await?;
326        parse_npm_downloads_response(&text)
327    }
328
329    let weekly = fetch_range(client, package, "last-week").await?;
330    let monthly = fetch_range(client, package, "last-month").await?;
331
332    Ok(NpmDownloads { weekly, monthly })
333}
334
335pub fn parse_npm_registry_response(text: &str) -> Result<NpmRegistryMeta> {
336    let value: serde_json::Value = serde_json::from_str(text)
337        .map_err(|err| Error::api(format!("npm registry response parse error: {err}")))?;
338
339    let latest_version = value
340        .get("dist-tags")
341        .and_then(|tags| tags.get("latest"))
342        .and_then(|v| v.as_str())
343        .map(ToString::to_string);
344
345    let last_publish = latest_version
346        .as_deref()
347        .and_then(|latest| value.get("time").and_then(|t| t.get(latest)))
348        .and_then(|v| v.as_str())
349        .map(ToString::to_string);
350
351    let repository_url = match value.get("repository") {
352        Some(serde_json::Value::String(url)) => Some(url.clone()),
353        Some(serde_json::Value::Object(obj)) => obj
354            .get("url")
355            .and_then(|url| url.as_str())
356            .map(ToString::to_string),
357        _ => None,
358    };
359
360    Ok(NpmRegistryMeta {
361        latest_version,
362        last_publish,
363        repository_url,
364    })
365}
366
367pub async fn fetch_npm_registry_meta(
368    client: &Client,
369    package: &str,
370) -> Result<Option<NpmRegistryMeta>> {
371    let encoded = url::form_urlencoded::byte_serialize(package.as_bytes()).collect::<String>();
372    let url = format!("https://registry.npmjs.org/{encoded}");
373    let response = client
374        .get(&url)
375        .timeout(POPULARITY_REQUEST_TIMEOUT)
376        .send()
377        .await?;
378    let status = response.status();
379    let text = response.text().await?;
380
381    match status {
382        200 => Ok(Some(parse_npm_registry_response(&text)?)),
383        404 => Ok(None),
384        other => Err(Error::api(format!("npm registry error {other}: {text}"))),
385    }
386}
387
388fn parse_owner_repo(owner: &str, repo: &str) -> Option<GitHubRepoRef> {
389    let owner = owner.trim().trim_matches('/').to_string();
390    let repo = repo
391        .trim()
392        .trim_matches('/')
393        .trim_end_matches(".git")
394        .to_string();
395    if owner.is_empty() || repo.is_empty() {
396        return None;
397    }
398    Some(GitHubRepoRef { owner, repo })
399}
400
401fn parse_owner_repo_from_path(path: &str) -> Option<GitHubRepoRef> {
402    let path = path.trim().trim_matches('/');
403    let mut parts = path.split('/');
404    let owner = parts.next()?;
405    let repo = parts.next()?;
406    parse_owner_repo(owner, repo)
407}
408
409/// Fetch all referenced GitHub repos (deduped) and return a `full_name -> metrics` map.
410pub async fn snapshot_github_repos(
411    client: &Client,
412    token: &str,
413    repos: &[GitHubRepoRef],
414) -> Result<HashMap<String, GitHubRepoMetrics>> {
415    let mut out = HashMap::new();
416    for repo in repos {
417        if let Some(metrics) = fetch_github_repo_metrics_optional(client, token, repo).await? {
418            out.insert(repo.full_name(), metrics);
419        }
420    }
421    Ok(out)
422}
423
424#[cfg(test)]
425mod tests {
426    use super::*;
427
428    // ====================================================================
429    // GitHubRepoRef
430    // ====================================================================
431
432    #[test]
433    fn github_repo_ref_full_name() {
434        let r = GitHubRepoRef {
435            owner: "anthropics".to_string(),
436            repo: "claude-code".to_string(),
437        };
438        assert_eq!(r.full_name(), "anthropics/claude-code");
439    }
440
441    // ====================================================================
442    // github_repo_candidate_from_url
443    // ====================================================================
444
445    #[test]
446    fn url_https_standard() {
447        let c = github_repo_candidate_from_url("https://github.com/owner/repo").unwrap();
448        assert_eq!(
449            c,
450            GitHubRepoCandidate::Repo(GitHubRepoRef {
451                owner: "owner".to_string(),
452                repo: "repo".to_string()
453            })
454        );
455    }
456
457    #[test]
458    fn url_https_with_dot_git() {
459        let c = github_repo_candidate_from_url("https://github.com/owner/repo.git").unwrap();
460        assert_eq!(
461            c,
462            GitHubRepoCandidate::Repo(GitHubRepoRef {
463                owner: "owner".to_string(),
464                repo: "repo".to_string()
465            })
466        );
467    }
468
469    #[test]
470    fn url_git_plus_https() {
471        let c = github_repo_candidate_from_url("git+https://github.com/owner/repo.git").unwrap();
472        assert_eq!(
473            c,
474            GitHubRepoCandidate::Repo(GitHubRepoRef {
475                owner: "owner".to_string(),
476                repo: "repo".to_string()
477            })
478        );
479    }
480
481    #[test]
482    fn url_git_at_scp() {
483        let c = github_repo_candidate_from_url("git@github.com:owner/repo.git").unwrap();
484        assert_eq!(
485            c,
486            GitHubRepoCandidate::Repo(GitHubRepoRef {
487                owner: "owner".to_string(),
488                repo: "repo".to_string()
489            })
490        );
491    }
492
493    #[test]
494    fn url_bare_domain() {
495        let c = github_repo_candidate_from_url("github.com/owner/repo").unwrap();
496        assert_eq!(
497            c,
498            GitHubRepoCandidate::Repo(GitHubRepoRef {
499                owner: "owner".to_string(),
500                repo: "repo".to_string()
501            })
502        );
503    }
504
505    #[test]
506    fn url_single_segment_returns_slug() {
507        let c = github_repo_candidate_from_url("https://github.com/foo-bar").unwrap();
508        assert_eq!(c, GitHubRepoCandidate::Slug("foo-bar".to_string()));
509    }
510
511    #[test]
512    fn url_empty_string_returns_none() {
513        assert!(github_repo_candidate_from_url("").is_none());
514    }
515
516    #[test]
517    fn url_whitespace_only_returns_none() {
518        assert!(github_repo_candidate_from_url("   ").is_none());
519    }
520
521    #[test]
522    fn url_non_github_returns_none() {
523        assert!(github_repo_candidate_from_url("https://gitlab.com/owner/repo").is_none());
524    }
525
526    #[test]
527    fn url_git_at_non_github_returns_none() {
528        assert!(github_repo_candidate_from_url("git@gitlab.com:owner/repo.git").is_none());
529    }
530
531    #[test]
532    fn url_with_trailing_path() {
533        let c = github_repo_candidate_from_url("https://github.com/owner/repo/tree/main").unwrap();
534        assert_eq!(
535            c,
536            GitHubRepoCandidate::Repo(GitHubRepoRef {
537                owner: "owner".to_string(),
538                repo: "repo".to_string()
539            })
540        );
541    }
542
543    #[test]
544    fn url_with_leading_trailing_whitespace() {
545        let c = github_repo_candidate_from_url("  https://github.com/owner/repo  ").unwrap();
546        assert_eq!(
547            c,
548            GitHubRepoCandidate::Repo(GitHubRepoRef {
549                owner: "owner".to_string(),
550                repo: "repo".to_string()
551            })
552        );
553    }
554
555    // ====================================================================
556    // github_repo_guesses_from_slug
557    // ====================================================================
558
559    #[test]
560    fn slug_guess_pi_pattern() {
561        let guesses = github_repo_guesses_from_slug("owner-pi-foo");
562        assert!(
563            guesses
564                .iter()
565                .any(|r| r.owner == "owner" && r.repo == "pi-foo")
566        );
567    }
568
569    #[test]
570    fn slug_guess_simple_hyphen() {
571        let guesses = github_repo_guesses_from_slug("alice-myrepo");
572        assert!(
573            guesses
574                .iter()
575                .any(|r| r.owner == "alice" && r.repo == "myrepo")
576        );
577    }
578
579    #[test]
580    fn slug_guess_empty_returns_empty() {
581        assert!(github_repo_guesses_from_slug("").is_empty());
582    }
583
584    #[test]
585    fn slug_guess_whitespace_returns_empty() {
586        assert!(github_repo_guesses_from_slug("   ").is_empty());
587    }
588
589    #[test]
590    fn slug_guess_no_hyphen_returns_empty() {
591        assert!(github_repo_guesses_from_slug("nohyphen").is_empty());
592    }
593
594    #[test]
595    fn slug_guess_multiple_hyphens_gives_multiple_guesses() {
596        let guesses = github_repo_guesses_from_slug("a-b-c");
597        assert!(!guesses.is_empty());
598        // Should contain at least first-split ("a"/"b-c") and last-split ("a-b"/"c").
599        assert!(guesses.iter().any(|r| r.owner == "a" && r.repo == "b-c"));
600        assert!(guesses.iter().any(|r| r.owner == "a-b" && r.repo == "c"));
601    }
602
603    // ====================================================================
604    // parse_github_repo_response
605    // ====================================================================
606
607    #[test]
608    fn parse_github_repo_response_full() {
609        let json = r#"{
610            "full_name": "anthropics/claude-code",
611            "stargazers_count": 42000,
612            "forks_count": 1500,
613            "subscribers_count": 800,
614            "open_issues_count": 123,
615            "pushed_at": "2026-02-01T12:00:00Z"
616        }"#;
617        let metrics = parse_github_repo_response(json).unwrap();
618        assert_eq!(metrics.full_name, "anthropics/claude-code");
619        assert_eq!(metrics.stars, 42000);
620        assert_eq!(metrics.forks, 1500);
621        assert_eq!(metrics.watchers, Some(800));
622        assert_eq!(metrics.open_issues, 123);
623        assert_eq!(metrics.pushed_at, Some("2026-02-01T12:00:00Z".to_string()));
624    }
625
626    #[test]
627    fn parse_github_repo_response_missing_optional_fields() {
628        let json = r#"{
629            "full_name": "owner/repo",
630            "stargazers_count": 10,
631            "forks_count": 2,
632            "open_issues_count": 0
633        }"#;
634        let metrics = parse_github_repo_response(json).unwrap();
635        assert_eq!(metrics.stars, 10);
636        assert_eq!(metrics.watchers, None);
637        assert_eq!(metrics.pushed_at, None);
638    }
639
640    #[test]
641    fn parse_github_repo_response_invalid_json() {
642        assert!(parse_github_repo_response("{broken}").is_err());
643    }
644
645    // ====================================================================
646    // parse_npm_downloads_response
647    // ====================================================================
648
649    #[test]
650    fn parse_npm_downloads_response_with_count() {
651        let json = r#"{"downloads": 50000}"#;
652        assert_eq!(parse_npm_downloads_response(json).unwrap(), Some(50000));
653    }
654
655    #[test]
656    fn parse_npm_downloads_response_with_error() {
657        let json = r#"{"error": "package not found"}"#;
658        assert_eq!(parse_npm_downloads_response(json).unwrap(), None);
659    }
660
661    #[test]
662    fn parse_npm_downloads_response_null_downloads() {
663        let json = r#"{"downloads": null}"#;
664        assert_eq!(parse_npm_downloads_response(json).unwrap(), None);
665    }
666
667    #[test]
668    fn parse_npm_downloads_response_zero() {
669        let json = r#"{"downloads": 0}"#;
670        assert_eq!(parse_npm_downloads_response(json).unwrap(), Some(0));
671    }
672
673    #[test]
674    fn parse_npm_downloads_response_invalid_json() {
675        assert!(parse_npm_downloads_response("{bad").is_err());
676    }
677
678    // ====================================================================
679    // parse_npm_registry_response
680    // ====================================================================
681
682    #[test]
683    fn parse_npm_registry_response_full() {
684        let json = r#"{
685            "dist-tags": {"latest": "3.2.1"},
686            "time": {"3.2.1": "2026-01-15T10:00:00Z"},
687            "repository": {"type": "git", "url": "https://github.com/owner/repo.git"}
688        }"#;
689        let meta = parse_npm_registry_response(json).unwrap();
690        assert_eq!(meta.latest_version, Some("3.2.1".to_string()));
691        assert_eq!(meta.last_publish, Some("2026-01-15T10:00:00Z".to_string()));
692        assert_eq!(
693            meta.repository_url,
694            Some("https://github.com/owner/repo.git".to_string())
695        );
696    }
697
698    #[test]
699    fn parse_npm_registry_response_string_repository() {
700        let json = r#"{
701            "dist-tags": {"latest": "1.0.0"},
702            "time": {"1.0.0": "2026-01-01T00:00:00Z"},
703            "repository": "https://github.com/owner/repo"
704        }"#;
705        let meta = parse_npm_registry_response(json).unwrap();
706        assert_eq!(
707            meta.repository_url,
708            Some("https://github.com/owner/repo".to_string())
709        );
710    }
711
712    #[test]
713    fn parse_npm_registry_response_no_dist_tags() {
714        let json = "{}";
715        let meta = parse_npm_registry_response(json).unwrap();
716        assert_eq!(meta.latest_version, None);
717        assert_eq!(meta.last_publish, None);
718        assert_eq!(meta.repository_url, None);
719    }
720
721    #[test]
722    fn parse_npm_registry_response_invalid_json() {
723        assert!(parse_npm_registry_response("{broken").is_err());
724    }
725
726    // ====================================================================
727    // PopularityEvidence serde round-trip
728    // ====================================================================
729
730    #[test]
731    fn popularity_evidence_default_serializes_all_none() {
732        let pe = PopularityEvidence::default();
733        let json = serde_json::to_value(&pe).unwrap();
734        assert!(json["github_stars"].is_null());
735        assert!(json["npm_downloads_weekly"].is_null());
736        assert!(json["marketplace_rank"].is_null());
737    }
738
739    #[test]
740    fn popularity_evidence_round_trip() {
741        let pe = PopularityEvidence {
742            snapshot_at: Some("2026-02-06T12:00:00Z".to_string()),
743            github_stars: Some(42000),
744            github_forks: Some(1500),
745            npm_downloads_weekly: Some(100_000),
746            npm_downloads_monthly: Some(400_000),
747            ..Default::default()
748        };
749        let json = serde_json::to_string(&pe).unwrap();
750        let pe2: PopularityEvidence = serde_json::from_str(&json).unwrap();
751        assert_eq!(pe2.github_stars, Some(42000));
752        assert_eq!(pe2.npm_downloads_weekly, Some(100_000));
753        assert_eq!(pe2.github_watchers, None);
754    }
755
756    // ====================================================================
757    // CandidateSource serde (tagged enum variants)
758    // ====================================================================
759
760    #[test]
761    fn candidate_source_git_round_trip() {
762        let src = CandidateSource::Git {
763            repo: "https://github.com/owner/repo.git".to_string(),
764            path: Some("packages/core".to_string()),
765        };
766        let json = serde_json::to_string(&src).unwrap();
767        assert!(json.contains(r#""type":"git"#));
768        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
769        match deserialized {
770            CandidateSource::Git { repo, path } => {
771                assert_eq!(repo, "https://github.com/owner/repo.git");
772                assert_eq!(path, Some("packages/core".to_string()));
773            }
774            _ => panic!(),
775        }
776    }
777
778    #[test]
779    fn candidate_source_npm_round_trip() {
780        let src = CandidateSource::Npm {
781            package: "@scope/pkg".to_string(),
782            version: "1.2.3".to_string(),
783            url: "https://registry.npmjs.org/@scope/pkg/-/pkg-1.2.3.tgz".to_string(),
784        };
785        let json = serde_json::to_string(&src).unwrap();
786        assert!(json.contains(r#""type":"npm"#));
787        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
788        match deserialized {
789            CandidateSource::Npm {
790                package,
791                version,
792                url,
793            } => {
794                assert_eq!(package, "@scope/pkg");
795                assert_eq!(version, "1.2.3");
796                assert!(url.contains("registry.npmjs.org"));
797            }
798            _ => panic!(),
799        }
800    }
801
802    #[test]
803    fn candidate_source_url_round_trip() {
804        let src = CandidateSource::Url {
805            url: "https://example.com/ext.tgz".to_string(),
806        };
807        let json = serde_json::to_string(&src).unwrap();
808        assert!(json.contains(r#""type":"url"#));
809        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
810        match deserialized {
811            CandidateSource::Url { url } => {
812                assert_eq!(url, "https://example.com/ext.tgz");
813            }
814            _ => panic!(),
815        }
816    }
817
818    #[test]
819    fn candidate_source_git_no_path() {
820        let src = CandidateSource::Git {
821            repo: "https://github.com/owner/repo".to_string(),
822            path: None,
823        };
824        let json = serde_json::to_string(&src).unwrap();
825        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
826        match deserialized {
827            CandidateSource::Git { path, .. } => {
828                assert_eq!(path, None);
829            }
830            _ => panic!(),
831        }
832    }
833
834    // ====================================================================
835    // parse_owner_repo edge cases (via public API)
836    // ====================================================================
837
838    #[test]
839    fn url_with_trailing_slash() {
840        let c = github_repo_candidate_from_url("https://github.com/owner/repo/").unwrap();
841        assert_eq!(
842            c,
843            GitHubRepoCandidate::Repo(GitHubRepoRef {
844                owner: "owner".to_string(),
845                repo: "repo".to_string()
846            })
847        );
848    }
849
850    // ====================================================================
851    // NpmDownloads / NpmRegistryMeta / GitHubRepoMetrics equality
852    // ====================================================================
853
854    #[test]
855    fn npm_downloads_equality() {
856        let a = NpmDownloads {
857            weekly: Some(100),
858            monthly: Some(400),
859        };
860        let b = NpmDownloads {
861            weekly: Some(100),
862            monthly: Some(400),
863        };
864        assert_eq!(a, b);
865    }
866
867    #[test]
868    fn github_repo_metrics_equality() {
869        let a = GitHubRepoMetrics {
870            full_name: "o/r".to_string(),
871            stars: 10,
872            forks: 5,
873            watchers: None,
874            open_issues: 0,
875            pushed_at: None,
876        };
877        let b = a.clone();
878        assert_eq!(a, b);
879    }
880
881    mod proptest_extension_popularity {
882        use super::*;
883        use proptest::prelude::*;
884
885        proptest! {
886            /// `github_repo_candidate_from_url` never panics on arbitrary input.
887            #[test]
888            fn github_url_never_panics(s in "(?s).{0,200}") {
889                let _ = github_repo_candidate_from_url(&s);
890            }
891
892            /// Valid `https://github.com/owner/repo` URLs always parse to `Repo`.
893            #[test]
894            fn valid_github_url_parses_to_repo(
895                owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,20}",
896                repo in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,20}"
897            ) {
898                let url = format!("https://github.com/{owner}/{repo}");
899                let result = github_repo_candidate_from_url(&url);
900                assert!(
901                    matches!(result, Some(GitHubRepoCandidate::Repo(_))),
902                    "expected Repo for {url}, got {result:?}"
903                );
904            }
905
906            /// `.git` suffix is stripped — with and without `.git` parse identically.
907            #[test]
908            fn git_suffix_stripped(
909                owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,10}",
910                repo in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,10}"
911            ) {
912                let with_git = format!("https://github.com/{owner}/{repo}.git");
913                let without_git = format!("https://github.com/{owner}/{repo}");
914                assert_eq!(
915                    github_repo_candidate_from_url(&with_git),
916                    github_repo_candidate_from_url(&without_git)
917                );
918            }
919
920            /// Whitespace-padded URLs parse identically to trimmed.
921            #[test]
922            fn whitespace_padded_url(
923                owner in "[a-zA-Z0-9]{1,10}",
924                repo in "[a-zA-Z0-9]{1,10}",
925                spaces in "[ \\t]{0,5}"
926            ) {
927                let clean = format!("https://github.com/{owner}/{repo}");
928                let padded = format!("{spaces}{clean}{spaces}");
929                assert_eq!(
930                    github_repo_candidate_from_url(&clean),
931                    github_repo_candidate_from_url(&padded)
932                );
933            }
934
935            /// Empty/whitespace input returns `None`.
936            #[test]
937            fn empty_input_returns_none(ws in "[ \\t\\n]{0,10}") {
938                assert!(github_repo_candidate_from_url(&ws).is_none());
939            }
940
941            /// Non-github.com URLs return `None`.
942            #[test]
943            fn non_github_returns_none(
944                host in "[a-z]{3,10}\\.(com|org|io)",
945                path in "[a-z]{1,10}/[a-z]{1,10}"
946            ) {
947                // Skip if we accidentally generated github.com
948                let url = format!("https://{host}/{path}");
949                if host != "github.com" {
950                    assert!(github_repo_candidate_from_url(&url).is_none());
951                }
952            }
953
954            /// `full_name` always has format `owner/repo`.
955            #[test]
956            fn full_name_format(
957                owner in "[a-zA-Z0-9]{1,15}",
958                repo in "[a-zA-Z0-9]{1,15}"
959            ) {
960                let r = GitHubRepoRef {
961                    owner: owner.clone(),
962                    repo: repo.clone(),
963                };
964                let full = r.full_name();
965                assert_eq!(full, format!("{owner}/{repo}"));
966                assert!(full.contains('/'));
967            }
968
969            /// `github_repo_guesses_from_slug` never panics.
970            #[test]
971            fn slug_guesses_never_panics(s in ".{0,100}") {
972                let _ = github_repo_guesses_from_slug(&s);
973            }
974
975            /// Slug guesses all have non-empty owner and repo.
976            #[test]
977            fn slug_guesses_fields_nonempty(slug in "[a-zA-Z0-9_-]{1,30}") {
978                for guess in github_repo_guesses_from_slug(&slug) {
979                    assert!(!guess.owner.is_empty());
980                    assert!(!guess.repo.is_empty());
981                }
982            }
983
984            /// Slugs without hyphens produce no guesses.
985            #[test]
986            fn slug_no_hyphen_empty(slug in "[a-zA-Z0-9]{1,20}") {
987                assert!(
988                    github_repo_guesses_from_slug(&slug).is_empty(),
989                    "expected no guesses for hyphenless slug: {slug}"
990                );
991            }
992
993            /// `parse_github_repo_response` preserves numeric fields.
994            #[test]
995            fn github_response_preserves_values(
996                stars in 0u64..10_000_000,
997                forks in 0u64..1_000_000,
998                issues in 0u64..100_000
999            ) {
1000                let json = format!(
1001                    r#"{{"full_name":"o/r","stargazers_count":{stars},"forks_count":{forks},"open_issues_count":{issues}}}"#
1002                );
1003                let m = parse_github_repo_response(&json).unwrap();
1004                assert_eq!(m.stars, stars);
1005                assert_eq!(m.forks, forks);
1006                assert_eq!(m.open_issues, issues);
1007            }
1008
1009            /// `parse_github_repo_response` fails on invalid JSON.
1010            #[test]
1011            fn github_response_invalid_json(s in "[a-z]{5,20}") {
1012                assert!(parse_github_repo_response(&s).is_err());
1013            }
1014
1015            /// `parse_npm_downloads_response` returns `None` when error field is present.
1016            #[test]
1017            fn npm_downloads_error_returns_none(msg in "[a-z ]{1,30}") {
1018                let json = format!(r#"{{"error":"{msg}","downloads":42}}"#);
1019                assert_eq!(parse_npm_downloads_response(&json).unwrap(), None);
1020            }
1021
1022            /// `parse_npm_downloads_response` preserves download count.
1023            #[test]
1024            fn npm_downloads_value_preserved(n in 0u64..100_000_000) {
1025                let json = format!(r#"{{"downloads":{n}}}"#);
1026                assert_eq!(parse_npm_downloads_response(&json).unwrap(), Some(n));
1027            }
1028
1029            /// `parse_npm_downloads_response` fails on invalid JSON.
1030            #[test]
1031            fn npm_downloads_invalid_json(s in "[a-z]{5,20}") {
1032                assert!(parse_npm_downloads_response(&s).is_err());
1033            }
1034
1035            /// `parse_npm_registry_response` extracts repository URL from string form.
1036            #[test]
1037            fn npm_registry_string_repo_url(url in "https://[a-z]{3,10}\\.com/[a-z]{1,10}") {
1038                let json = format!(r#"{{"repository":"{url}"}}"#);
1039                let meta = parse_npm_registry_response(&json).unwrap();
1040                assert_eq!(meta.repository_url.as_deref(), Some(url.as_str()));
1041            }
1042
1043            /// `parse_npm_registry_response` extracts repository URL from object form.
1044            #[test]
1045            fn npm_registry_object_repo_url(url in "https://[a-z]{3,10}\\.com/[a-z]{1,10}") {
1046                let json = format!(r#"{{"repository":{{"type":"git","url":"{url}"}}}}"#);
1047                let meta = parse_npm_registry_response(&json).unwrap();
1048                assert_eq!(meta.repository_url.as_deref(), Some(url.as_str()));
1049            }
1050
1051            /// `parse_npm_registry_response` fails on invalid JSON.
1052            #[test]
1053            fn npm_registry_invalid_json(s in "[a-z]{5,20}") {
1054                assert!(parse_npm_registry_response(&s).is_err());
1055            }
1056
1057            /// `PopularityEvidence` serde roundtrip.
1058            #[test]
1059            fn popularity_evidence_serde_roundtrip(
1060                stars in prop::option::of(0u64..1_000_000),
1061                forks in prop::option::of(0u64..100_000),
1062                weekly in prop::option::of(0u64..10_000_000)
1063            ) {
1064                let ev = PopularityEvidence {
1065                    github_stars: stars,
1066                    github_forks: forks,
1067                    npm_downloads_weekly: weekly,
1068                    ..PopularityEvidence::default()
1069                };
1070                let json = serde_json::to_string(&ev).unwrap();
1071                let back: PopularityEvidence = serde_json::from_str(&json).unwrap();
1072                assert_eq!(back.github_stars, stars);
1073                assert_eq!(back.github_forks, forks);
1074                assert_eq!(back.npm_downloads_weekly, weekly);
1075            }
1076        }
1077    }
1078}