Skip to main content

pi/
extension_popularity.rs

1//! Popularity signal snapshotting for extension candidates.
2//!
3//! This module is intentionally "evidence-first":
4//! - Fetch concrete metrics (GitHub stars/downloads/etc).
5//! - Normalize missing/unavailable metrics to `null` (never `0`).
6//! - Persist evidence onto the canonical candidate pool JSON so scoring can be auditable.
7
8use crate::error::{Error, Result};
9use crate::http::client::Client;
10use serde::{Deserialize, Serialize};
11use std::collections::{HashMap, HashSet};
12use std::time::Duration;
13
14const POPULARITY_REQUEST_TIMEOUT: Duration = Duration::from_secs(20);
15
16#[derive(Debug, Clone, Deserialize, Serialize)]
17pub struct CandidatePool {
18    #[serde(rename = "$schema")]
19    pub schema: String,
20    pub generated_at: String,
21    pub source_inputs: SourceInputs,
22    pub total_candidates: u64,
23    pub items: Vec<CandidateItem>,
24    pub alias_notes: Vec<AliasNote>,
25}
26
27#[derive(Debug, Clone, Deserialize, Serialize)]
28pub struct SourceInputs {
29    pub artifact_provenance: String,
30    pub artifact_root: String,
31    pub extra_npm_packages: Vec<String>,
32}
33
34#[derive(Debug, Clone, Deserialize, Serialize)]
35pub struct AliasNote {
36    pub note: String,
37}
38
39#[derive(Debug, Clone, Deserialize, Serialize)]
40pub struct CandidateItem {
41    pub id: String,
42    pub name: String,
43    pub source_tier: String,
44    pub status: String,
45    pub license: String,
46    pub retrieved: Option<String>,
47    pub artifact_path: Option<String>,
48    pub checksum: Option<Sha256Checksum>,
49    pub source: CandidateSource,
50    pub repository_url: Option<String>,
51    #[serde(default)]
52    pub popularity: PopularityEvidence,
53    pub aliases: Vec<String>,
54    pub notes: Option<String>,
55}
56
57#[derive(Debug, Clone, Deserialize, Serialize)]
58pub struct Sha256Checksum {
59    pub sha256: String,
60}
61
62#[derive(Debug, Clone, Deserialize, Serialize)]
63#[serde(tag = "type", rename_all = "snake_case")]
64pub enum CandidateSource {
65    Git {
66        repo: String,
67        #[serde(default)]
68        path: Option<String>,
69    },
70    Npm {
71        package: String,
72        version: String,
73        url: String,
74    },
75    Url {
76        url: String,
77    },
78}
79
80/// Popularity evidence schema.
81///
82/// This is the machine-joinable surface used by scoring (see `docs/EXTENSION_POPULARITY_CRITERIA.md`).
83/// When a metric is unknown/unavailable, it should be persisted as explicit `null`.
84#[derive(Debug, Clone, Default, Deserialize, Serialize)]
85pub struct PopularityEvidence {
86    pub snapshot_at: Option<String>,
87
88    // GitHub
89    pub github_repo: Option<String>,
90    pub github_stars: Option<u64>,
91    pub github_forks: Option<u64>,
92    pub github_watchers: Option<u64>,
93    pub github_open_issues: Option<u64>,
94    pub github_last_commit: Option<String>,
95
96    // npm
97    pub npm_downloads_weekly: Option<u64>,
98    pub npm_downloads_monthly: Option<u64>,
99    pub npm_last_publish: Option<String>,
100    pub npm_dependents: Option<u64>,
101
102    // Marketplace (OpenClaw / ClawHub) - not currently populated by the candidate pool.
103    pub marketplace_rank: Option<u32>,
104    pub marketplace_installs_monthly: Option<u64>,
105    pub marketplace_featured: Option<bool>,
106
107    // Mentions / references - not currently populated by the candidate pool.
108    pub mentions_count: Option<u32>,
109    pub mentions_sources: Option<Vec<String>>,
110}
111
112#[derive(Debug, Clone, PartialEq, Eq, Hash)]
113pub struct GitHubRepoRef {
114    pub owner: String,
115    pub repo: String,
116}
117
118impl GitHubRepoRef {
119    #[must_use]
120    pub fn full_name(&self) -> String {
121        format!("{}/{}", self.owner, self.repo)
122    }
123}
124
125#[derive(Debug, Clone, PartialEq, Eq)]
126pub struct GitHubRepoMetrics {
127    pub full_name: String,
128    pub stars: u64,
129    pub forks: u64,
130    pub watchers: Option<u64>,
131    pub open_issues: u64,
132    pub pushed_at: Option<String>,
133}
134
135#[derive(Debug, Clone, PartialEq, Eq)]
136pub struct NpmDownloads {
137    pub weekly: Option<u64>,
138    pub monthly: Option<u64>,
139}
140
141#[derive(Debug, Clone, PartialEq, Eq)]
142pub struct NpmRegistryMeta {
143    pub latest_version: Option<String>,
144    pub last_publish: Option<String>,
145    pub repository_url: Option<String>,
146}
147
148#[derive(Debug, Clone, PartialEq, Eq)]
149pub enum GitHubRepoCandidate {
150    Repo(GitHubRepoRef),
151    /// A malformed GitHub URL that only included a single path segment, e.g. `https://github.com/foo-bar`.
152    Slug(String),
153}
154
155/// Best-effort parse of a GitHub repository reference from a URL-like string.
156///
157/// Supports:
158/// - `https://github.com/owner/repo`
159/// - `git+https://github.com/owner/repo.git`
160/// - `git@github.com:owner/repo.git`
161/// - `github.com/owner/repo`
162///
163/// For malformed single-segment URLs (e.g. `https://github.com/foo-bar`) returns a `Slug` candidate.
164#[must_use]
165pub fn github_repo_candidate_from_url(input: &str) -> Option<GitHubRepoCandidate> {
166    let raw = input.trim();
167    if raw.is_empty() {
168        return None;
169    }
170
171    let raw = raw.strip_prefix("git+").unwrap_or(raw);
172
173    if let Some(rest) = raw.strip_prefix("git@") {
174        // SCP-like: git@github.com:owner/repo(.git)
175        let (_host, path) = rest.split_once(':')?;
176        return parse_owner_repo_from_path(path).map(GitHubRepoCandidate::Repo);
177    }
178
179    let url_str = if raw.contains("://") {
180        raw.to_string()
181    } else {
182        format!("https://{raw}")
183    };
184
185    let Ok(url) = url::Url::parse(&url_str) else {
186        return None;
187    };
188    if url.host_str()? != "github.com" {
189        return None;
190    }
191
192    let mut segments = url.path_segments()?.filter(|seg| !seg.is_empty());
193    let ownerish = segments.next()?.to_string();
194    let repo = segments.next().map(ToString::to_string);
195
196    match repo {
197        Some(ref repo) => parse_owner_repo(&ownerish, repo).map(GitHubRepoCandidate::Repo),
198        None => Some(GitHubRepoCandidate::Slug(ownerish)),
199    }
200}
201
202#[must_use]
203pub fn github_repo_guesses_from_slug(slug: &str) -> Vec<GitHubRepoRef> {
204    let slug = slug.trim().trim_matches('/');
205    if slug.is_empty() {
206        return Vec::new();
207    }
208
209    let mut seen = HashSet::<GitHubRepoRef>::new();
210    let mut out = Vec::new();
211
212    // Common case for our third-party imports: `owner-pi-foo` should be `owner/pi-foo`.
213    if let Some((owner, suffix)) = slug.split_once("-pi-") {
214        let repo = format!("pi-{suffix}");
215        if let Some(r) = parse_owner_repo(owner, &repo) {
216            if seen.insert(r.clone()) {
217                out.push(r);
218            }
219        }
220    }
221
222    // Try first hyphen split: `owner-rest...` -> `owner/rest...`
223    if let Some((owner, repo)) = slug.split_once('-') {
224        if let Some(r) = parse_owner_repo(owner, repo) {
225            if seen.insert(r.clone()) {
226                out.push(r);
227            }
228        }
229    }
230
231    // Try last hyphen split: `owner...-repo` -> `owner.../repo`
232    if let Some((owner, repo)) = slug.rsplit_once('-') {
233        if let Some(r) = parse_owner_repo(owner, repo) {
234            if seen.insert(r.clone()) {
235                out.push(r);
236            }
237        }
238    }
239
240    out
241}
242
243pub fn parse_github_repo_response(text: &str) -> Result<GitHubRepoMetrics> {
244    #[derive(Debug, Deserialize)]
245    struct RepoResponse {
246        full_name: String,
247        stargazers_count: u64,
248        forks_count: u64,
249        #[serde(default)]
250        subscribers_count: Option<u64>,
251        open_issues_count: u64,
252        #[serde(default)]
253        pushed_at: Option<String>,
254    }
255
256    let parsed: RepoResponse = serde_json::from_str(text)
257        .map_err(|err| Error::api(format!("GitHub repo response parse error: {err}")))?;
258
259    Ok(GitHubRepoMetrics {
260        full_name: parsed.full_name,
261        stars: parsed.stargazers_count,
262        forks: parsed.forks_count,
263        watchers: parsed.subscribers_count,
264        open_issues: parsed.open_issues_count,
265        pushed_at: parsed.pushed_at,
266    })
267}
268
269pub async fn fetch_github_repo_metrics_optional(
270    client: &Client,
271    token: &str,
272    repo: &GitHubRepoRef,
273) -> Result<Option<GitHubRepoMetrics>> {
274    let url = format!("https://api.github.com/repos/{}/{}", repo.owner, repo.repo);
275    let response = client
276        .get(&url)
277        .timeout(POPULARITY_REQUEST_TIMEOUT)
278        .header("Accept", "application/vnd.github+json")
279        .header("X-GitHub-Api-Version", "2022-11-28")
280        .header("Authorization", format!("Bearer {token}"))
281        .send()
282        .await?;
283
284    let status = response.status();
285    let text = response.text().await?;
286
287    match status {
288        200 => Ok(Some(parse_github_repo_response(&text)?)),
289        404 => Ok(None),
290        other => Err(Error::api(format!("GitHub API error {other}: {text}"))),
291    }
292}
293
294pub fn parse_npm_downloads_response(text: &str) -> Result<Option<u64>> {
295    #[derive(Debug, Deserialize)]
296    struct DownloadsResponse {
297        #[serde(default)]
298        downloads: Option<u64>,
299        #[serde(default)]
300        error: Option<String>,
301    }
302
303    let parsed: DownloadsResponse = serde_json::from_str(text)
304        .map_err(|err| Error::api(format!("npm downloads response parse error: {err}")))?;
305
306    if parsed.error.is_some() {
307        return Ok(None);
308    }
309
310    Ok(parsed.downloads)
311}
312
313pub async fn fetch_npm_downloads(client: &Client, package: &str) -> Result<NpmDownloads> {
314    async fn fetch_range(client: &Client, package: &str, range: &str) -> Result<Option<u64>> {
315        let encoded = url::form_urlencoded::byte_serialize(package.as_bytes()).collect::<String>();
316        let url = format!("https://api.npmjs.org/downloads/point/{range}/{encoded}");
317        let response = client
318            .get(&url)
319            .timeout(POPULARITY_REQUEST_TIMEOUT)
320            .send()
321            .await?;
322        let text = response.text().await?;
323        parse_npm_downloads_response(&text)
324    }
325
326    let weekly = fetch_range(client, package, "last-week").await?;
327    let monthly = fetch_range(client, package, "last-month").await?;
328
329    Ok(NpmDownloads { weekly, monthly })
330}
331
332pub fn parse_npm_registry_response(text: &str) -> Result<NpmRegistryMeta> {
333    let value: serde_json::Value = serde_json::from_str(text)
334        .map_err(|err| Error::api(format!("npm registry response parse error: {err}")))?;
335
336    let latest_version = value
337        .get("dist-tags")
338        .and_then(|tags| tags.get("latest"))
339        .and_then(|v| v.as_str())
340        .map(ToString::to_string);
341
342    let last_publish = latest_version
343        .as_deref()
344        .and_then(|latest| value.get("time").and_then(|t| t.get(latest)))
345        .and_then(|v| v.as_str())
346        .map(ToString::to_string);
347
348    let repository_url = match value.get("repository") {
349        Some(serde_json::Value::String(url)) => Some(url.clone()),
350        Some(serde_json::Value::Object(obj)) => obj
351            .get("url")
352            .and_then(|url| url.as_str())
353            .map(ToString::to_string),
354        _ => None,
355    };
356
357    Ok(NpmRegistryMeta {
358        latest_version,
359        last_publish,
360        repository_url,
361    })
362}
363
364pub async fn fetch_npm_registry_meta(
365    client: &Client,
366    package: &str,
367) -> Result<Option<NpmRegistryMeta>> {
368    let encoded = url::form_urlencoded::byte_serialize(package.as_bytes()).collect::<String>();
369    let url = format!("https://registry.npmjs.org/{encoded}");
370    let response = client
371        .get(&url)
372        .timeout(POPULARITY_REQUEST_TIMEOUT)
373        .send()
374        .await?;
375    let status = response.status();
376    let text = response.text().await?;
377
378    match status {
379        200 => Ok(Some(parse_npm_registry_response(&text)?)),
380        404 => Ok(None),
381        other => Err(Error::api(format!("npm registry error {other}: {text}"))),
382    }
383}
384
385fn parse_owner_repo(owner: &str, repo: &str) -> Option<GitHubRepoRef> {
386    let owner = owner.trim().trim_matches('/').to_string();
387    let repo = repo
388        .trim()
389        .trim_matches('/')
390        .trim_end_matches(".git")
391        .to_string();
392    if owner.is_empty() || repo.is_empty() {
393        return None;
394    }
395    Some(GitHubRepoRef { owner, repo })
396}
397
398fn parse_owner_repo_from_path(path: &str) -> Option<GitHubRepoRef> {
399    let path = path.trim().trim_matches('/');
400    let mut parts = path.split('/');
401    let owner = parts.next()?;
402    let repo = parts.next()?;
403    parse_owner_repo(owner, repo)
404}
405
406/// Fetch all referenced GitHub repos (deduped) and return a `full_name -> metrics` map.
407pub async fn snapshot_github_repos(
408    client: &Client,
409    token: &str,
410    repos: &[GitHubRepoRef],
411) -> Result<HashMap<String, GitHubRepoMetrics>> {
412    let mut out = HashMap::new();
413    for repo in repos {
414        if let Some(metrics) = fetch_github_repo_metrics_optional(client, token, repo).await? {
415            out.insert(repo.full_name(), metrics);
416        }
417    }
418    Ok(out)
419}
420
421#[cfg(test)]
422mod tests {
423    use super::*;
424
425    // ====================================================================
426    // GitHubRepoRef
427    // ====================================================================
428
429    #[test]
430    fn github_repo_ref_full_name() {
431        let r = GitHubRepoRef {
432            owner: "anthropics".to_string(),
433            repo: "claude-code".to_string(),
434        };
435        assert_eq!(r.full_name(), "anthropics/claude-code");
436    }
437
438    // ====================================================================
439    // github_repo_candidate_from_url
440    // ====================================================================
441
442    #[test]
443    fn url_https_standard() {
444        let c = github_repo_candidate_from_url("https://github.com/owner/repo").unwrap();
445        assert_eq!(
446            c,
447            GitHubRepoCandidate::Repo(GitHubRepoRef {
448                owner: "owner".to_string(),
449                repo: "repo".to_string()
450            })
451        );
452    }
453
454    #[test]
455    fn url_https_with_dot_git() {
456        let c = github_repo_candidate_from_url("https://github.com/owner/repo.git").unwrap();
457        assert_eq!(
458            c,
459            GitHubRepoCandidate::Repo(GitHubRepoRef {
460                owner: "owner".to_string(),
461                repo: "repo".to_string()
462            })
463        );
464    }
465
466    #[test]
467    fn url_git_plus_https() {
468        let c = github_repo_candidate_from_url("git+https://github.com/owner/repo.git").unwrap();
469        assert_eq!(
470            c,
471            GitHubRepoCandidate::Repo(GitHubRepoRef {
472                owner: "owner".to_string(),
473                repo: "repo".to_string()
474            })
475        );
476    }
477
478    #[test]
479    fn url_git_at_scp() {
480        let c = github_repo_candidate_from_url("git@github.com:owner/repo.git").unwrap();
481        assert_eq!(
482            c,
483            GitHubRepoCandidate::Repo(GitHubRepoRef {
484                owner: "owner".to_string(),
485                repo: "repo".to_string()
486            })
487        );
488    }
489
490    #[test]
491    fn url_bare_domain() {
492        let c = github_repo_candidate_from_url("github.com/owner/repo").unwrap();
493        assert_eq!(
494            c,
495            GitHubRepoCandidate::Repo(GitHubRepoRef {
496                owner: "owner".to_string(),
497                repo: "repo".to_string()
498            })
499        );
500    }
501
502    #[test]
503    fn url_single_segment_returns_slug() {
504        let c = github_repo_candidate_from_url("https://github.com/foo-bar").unwrap();
505        assert_eq!(c, GitHubRepoCandidate::Slug("foo-bar".to_string()));
506    }
507
508    #[test]
509    fn url_empty_string_returns_none() {
510        assert!(github_repo_candidate_from_url("").is_none());
511    }
512
513    #[test]
514    fn url_whitespace_only_returns_none() {
515        assert!(github_repo_candidate_from_url("   ").is_none());
516    }
517
518    #[test]
519    fn url_non_github_returns_none() {
520        assert!(github_repo_candidate_from_url("https://gitlab.com/owner/repo").is_none());
521    }
522
523    #[test]
524    fn url_with_trailing_path() {
525        let c = github_repo_candidate_from_url("https://github.com/owner/repo/tree/main").unwrap();
526        assert_eq!(
527            c,
528            GitHubRepoCandidate::Repo(GitHubRepoRef {
529                owner: "owner".to_string(),
530                repo: "repo".to_string()
531            })
532        );
533    }
534
535    #[test]
536    fn url_with_leading_trailing_whitespace() {
537        let c = github_repo_candidate_from_url("  https://github.com/owner/repo  ").unwrap();
538        assert_eq!(
539            c,
540            GitHubRepoCandidate::Repo(GitHubRepoRef {
541                owner: "owner".to_string(),
542                repo: "repo".to_string()
543            })
544        );
545    }
546
547    // ====================================================================
548    // github_repo_guesses_from_slug
549    // ====================================================================
550
551    #[test]
552    fn slug_guess_pi_pattern() {
553        let guesses = github_repo_guesses_from_slug("owner-pi-foo");
554        assert!(
555            guesses
556                .iter()
557                .any(|r| r.owner == "owner" && r.repo == "pi-foo")
558        );
559    }
560
561    #[test]
562    fn slug_guess_simple_hyphen() {
563        let guesses = github_repo_guesses_from_slug("alice-myrepo");
564        assert!(
565            guesses
566                .iter()
567                .any(|r| r.owner == "alice" && r.repo == "myrepo")
568        );
569    }
570
571    #[test]
572    fn slug_guess_empty_returns_empty() {
573        assert!(github_repo_guesses_from_slug("").is_empty());
574    }
575
576    #[test]
577    fn slug_guess_whitespace_returns_empty() {
578        assert!(github_repo_guesses_from_slug("   ").is_empty());
579    }
580
581    #[test]
582    fn slug_guess_no_hyphen_returns_empty() {
583        assert!(github_repo_guesses_from_slug("nohyphen").is_empty());
584    }
585
586    #[test]
587    fn slug_guess_multiple_hyphens_gives_multiple_guesses() {
588        let guesses = github_repo_guesses_from_slug("a-b-c");
589        assert!(!guesses.is_empty());
590        // Should contain at least first-split ("a"/"b-c") and last-split ("a-b"/"c").
591        assert!(guesses.iter().any(|r| r.owner == "a" && r.repo == "b-c"));
592        assert!(guesses.iter().any(|r| r.owner == "a-b" && r.repo == "c"));
593    }
594
595    // ====================================================================
596    // parse_github_repo_response
597    // ====================================================================
598
599    #[test]
600    fn parse_github_repo_response_full() {
601        let json = r#"{
602            "full_name": "anthropics/claude-code",
603            "stargazers_count": 42000,
604            "forks_count": 1500,
605            "subscribers_count": 800,
606            "open_issues_count": 123,
607            "pushed_at": "2026-02-01T12:00:00Z"
608        }"#;
609        let metrics = parse_github_repo_response(json).unwrap();
610        assert_eq!(metrics.full_name, "anthropics/claude-code");
611        assert_eq!(metrics.stars, 42000);
612        assert_eq!(metrics.forks, 1500);
613        assert_eq!(metrics.watchers, Some(800));
614        assert_eq!(metrics.open_issues, 123);
615        assert_eq!(metrics.pushed_at, Some("2026-02-01T12:00:00Z".to_string()));
616    }
617
618    #[test]
619    fn parse_github_repo_response_missing_optional_fields() {
620        let json = r#"{
621            "full_name": "owner/repo",
622            "stargazers_count": 10,
623            "forks_count": 2,
624            "open_issues_count": 0
625        }"#;
626        let metrics = parse_github_repo_response(json).unwrap();
627        assert_eq!(metrics.stars, 10);
628        assert_eq!(metrics.watchers, None);
629        assert_eq!(metrics.pushed_at, None);
630    }
631
632    #[test]
633    fn parse_github_repo_response_invalid_json() {
634        assert!(parse_github_repo_response("{broken}").is_err());
635    }
636
637    // ====================================================================
638    // parse_npm_downloads_response
639    // ====================================================================
640
641    #[test]
642    fn parse_npm_downloads_response_with_count() {
643        let json = r#"{"downloads": 50000}"#;
644        assert_eq!(parse_npm_downloads_response(json).unwrap(), Some(50000));
645    }
646
647    #[test]
648    fn parse_npm_downloads_response_with_error() {
649        let json = r#"{"error": "package not found"}"#;
650        assert_eq!(parse_npm_downloads_response(json).unwrap(), None);
651    }
652
653    #[test]
654    fn parse_npm_downloads_response_null_downloads() {
655        let json = r#"{"downloads": null}"#;
656        assert_eq!(parse_npm_downloads_response(json).unwrap(), None);
657    }
658
659    #[test]
660    fn parse_npm_downloads_response_zero() {
661        let json = r#"{"downloads": 0}"#;
662        assert_eq!(parse_npm_downloads_response(json).unwrap(), Some(0));
663    }
664
665    #[test]
666    fn parse_npm_downloads_response_invalid_json() {
667        assert!(parse_npm_downloads_response("{bad").is_err());
668    }
669
670    // ====================================================================
671    // parse_npm_registry_response
672    // ====================================================================
673
674    #[test]
675    fn parse_npm_registry_response_full() {
676        let json = r#"{
677            "dist-tags": {"latest": "3.2.1"},
678            "time": {"3.2.1": "2026-01-15T10:00:00Z"},
679            "repository": {"type": "git", "url": "https://github.com/owner/repo.git"}
680        }"#;
681        let meta = parse_npm_registry_response(json).unwrap();
682        assert_eq!(meta.latest_version, Some("3.2.1".to_string()));
683        assert_eq!(meta.last_publish, Some("2026-01-15T10:00:00Z".to_string()));
684        assert_eq!(
685            meta.repository_url,
686            Some("https://github.com/owner/repo.git".to_string())
687        );
688    }
689
690    #[test]
691    fn parse_npm_registry_response_string_repository() {
692        let json = r#"{
693            "dist-tags": {"latest": "1.0.0"},
694            "time": {"1.0.0": "2026-01-01T00:00:00Z"},
695            "repository": "https://github.com/owner/repo"
696        }"#;
697        let meta = parse_npm_registry_response(json).unwrap();
698        assert_eq!(
699            meta.repository_url,
700            Some("https://github.com/owner/repo".to_string())
701        );
702    }
703
704    #[test]
705    fn parse_npm_registry_response_no_dist_tags() {
706        let json = "{}";
707        let meta = parse_npm_registry_response(json).unwrap();
708        assert_eq!(meta.latest_version, None);
709        assert_eq!(meta.last_publish, None);
710        assert_eq!(meta.repository_url, None);
711    }
712
713    #[test]
714    fn parse_npm_registry_response_invalid_json() {
715        assert!(parse_npm_registry_response("{broken").is_err());
716    }
717
718    // ====================================================================
719    // PopularityEvidence serde round-trip
720    // ====================================================================
721
722    #[test]
723    fn popularity_evidence_default_serializes_all_none() {
724        let pe = PopularityEvidence::default();
725        let json = serde_json::to_value(&pe).unwrap();
726        assert!(json["github_stars"].is_null());
727        assert!(json["npm_downloads_weekly"].is_null());
728        assert!(json["marketplace_rank"].is_null());
729    }
730
731    #[test]
732    fn popularity_evidence_round_trip() {
733        let pe = PopularityEvidence {
734            snapshot_at: Some("2026-02-06T12:00:00Z".to_string()),
735            github_stars: Some(42000),
736            github_forks: Some(1500),
737            npm_downloads_weekly: Some(100_000),
738            npm_downloads_monthly: Some(400_000),
739            ..Default::default()
740        };
741        let json = serde_json::to_string(&pe).unwrap();
742        let pe2: PopularityEvidence = serde_json::from_str(&json).unwrap();
743        assert_eq!(pe2.github_stars, Some(42000));
744        assert_eq!(pe2.npm_downloads_weekly, Some(100_000));
745        assert_eq!(pe2.github_watchers, None);
746    }
747
748    // ====================================================================
749    // CandidateSource serde (tagged enum variants)
750    // ====================================================================
751
752    #[test]
753    fn candidate_source_git_round_trip() {
754        let src = CandidateSource::Git {
755            repo: "https://github.com/owner/repo.git".to_string(),
756            path: Some("packages/core".to_string()),
757        };
758        let json = serde_json::to_string(&src).unwrap();
759        assert!(json.contains(r#""type":"git"#));
760        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
761        match deserialized {
762            CandidateSource::Git { repo, path } => {
763                assert_eq!(repo, "https://github.com/owner/repo.git");
764                assert_eq!(path, Some("packages/core".to_string()));
765            }
766            _ => panic!("expected Git variant"),
767        }
768    }
769
770    #[test]
771    fn candidate_source_npm_round_trip() {
772        let src = CandidateSource::Npm {
773            package: "@scope/pkg".to_string(),
774            version: "1.2.3".to_string(),
775            url: "https://registry.npmjs.org/@scope/pkg/-/pkg-1.2.3.tgz".to_string(),
776        };
777        let json = serde_json::to_string(&src).unwrap();
778        assert!(json.contains(r#""type":"npm"#));
779        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
780        match deserialized {
781            CandidateSource::Npm {
782                package,
783                version,
784                url,
785            } => {
786                assert_eq!(package, "@scope/pkg");
787                assert_eq!(version, "1.2.3");
788                assert!(url.contains("registry.npmjs.org"));
789            }
790            _ => panic!("expected Npm variant"),
791        }
792    }
793
794    #[test]
795    fn candidate_source_url_round_trip() {
796        let src = CandidateSource::Url {
797            url: "https://example.com/ext.tgz".to_string(),
798        };
799        let json = serde_json::to_string(&src).unwrap();
800        assert!(json.contains(r#""type":"url"#));
801        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
802        match deserialized {
803            CandidateSource::Url { url } => {
804                assert_eq!(url, "https://example.com/ext.tgz");
805            }
806            _ => panic!("expected Url variant"),
807        }
808    }
809
810    #[test]
811    fn candidate_source_git_no_path() {
812        let src = CandidateSource::Git {
813            repo: "https://github.com/owner/repo".to_string(),
814            path: None,
815        };
816        let json = serde_json::to_string(&src).unwrap();
817        let deserialized: CandidateSource = serde_json::from_str(&json).unwrap();
818        match deserialized {
819            CandidateSource::Git { path, .. } => {
820                assert_eq!(path, None);
821            }
822            _ => panic!("expected Git variant"),
823        }
824    }
825
826    // ====================================================================
827    // parse_owner_repo edge cases (via public API)
828    // ====================================================================
829
830    #[test]
831    fn url_with_trailing_slash() {
832        let c = github_repo_candidate_from_url("https://github.com/owner/repo/").unwrap();
833        assert_eq!(
834            c,
835            GitHubRepoCandidate::Repo(GitHubRepoRef {
836                owner: "owner".to_string(),
837                repo: "repo".to_string()
838            })
839        );
840    }
841
842    // ====================================================================
843    // NpmDownloads / NpmRegistryMeta / GitHubRepoMetrics equality
844    // ====================================================================
845
846    #[test]
847    fn npm_downloads_equality() {
848        let a = NpmDownloads {
849            weekly: Some(100),
850            monthly: Some(400),
851        };
852        let b = NpmDownloads {
853            weekly: Some(100),
854            monthly: Some(400),
855        };
856        assert_eq!(a, b);
857    }
858
859    #[test]
860    fn github_repo_metrics_equality() {
861        let a = GitHubRepoMetrics {
862            full_name: "o/r".to_string(),
863            stars: 10,
864            forks: 5,
865            watchers: None,
866            open_issues: 0,
867            pushed_at: None,
868        };
869        let b = a.clone();
870        assert_eq!(a, b);
871    }
872
873    mod proptest_extension_popularity {
874        use super::*;
875        use proptest::prelude::*;
876
877        proptest! {
878            /// `github_repo_candidate_from_url` never panics on arbitrary input.
879            #[test]
880            fn github_url_never_panics(s in "(?s).{0,200}") {
881                let _ = github_repo_candidate_from_url(&s);
882            }
883
884            /// Valid `https://github.com/owner/repo` URLs always parse to `Repo`.
885            #[test]
886            fn valid_github_url_parses_to_repo(
887                owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,20}",
888                repo in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,20}"
889            ) {
890                let url = format!("https://github.com/{owner}/{repo}");
891                let result = github_repo_candidate_from_url(&url);
892                assert!(
893                    matches!(result, Some(GitHubRepoCandidate::Repo(_))),
894                    "expected Repo for {url}, got {result:?}"
895                );
896            }
897
898            /// `.git` suffix is stripped — with and without `.git` parse identically.
899            #[test]
900            fn git_suffix_stripped(
901                owner in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,10}",
902                repo in "[a-zA-Z0-9][a-zA-Z0-9_-]{0,10}"
903            ) {
904                let with_git = format!("https://github.com/{owner}/{repo}.git");
905                let without_git = format!("https://github.com/{owner}/{repo}");
906                assert_eq!(
907                    github_repo_candidate_from_url(&with_git),
908                    github_repo_candidate_from_url(&without_git)
909                );
910            }
911
912            /// Whitespace-padded URLs parse identically to trimmed.
913            #[test]
914            fn whitespace_padded_url(
915                owner in "[a-zA-Z0-9]{1,10}",
916                repo in "[a-zA-Z0-9]{1,10}",
917                spaces in "[ \\t]{0,5}"
918            ) {
919                let clean = format!("https://github.com/{owner}/{repo}");
920                let padded = format!("{spaces}{clean}{spaces}");
921                assert_eq!(
922                    github_repo_candidate_from_url(&clean),
923                    github_repo_candidate_from_url(&padded)
924                );
925            }
926
927            /// Empty/whitespace input returns `None`.
928            #[test]
929            fn empty_input_returns_none(ws in "[ \\t\\n]{0,10}") {
930                assert!(github_repo_candidate_from_url(&ws).is_none());
931            }
932
933            /// Non-github.com URLs return `None`.
934            #[test]
935            fn non_github_returns_none(
936                host in "[a-z]{3,10}\\.(com|org|io)",
937                path in "[a-z]{1,10}/[a-z]{1,10}"
938            ) {
939                // Skip if we accidentally generated github.com
940                let url = format!("https://{host}/{path}");
941                if host != "github.com" {
942                    assert!(github_repo_candidate_from_url(&url).is_none());
943                }
944            }
945
946            /// `full_name` always has format `owner/repo`.
947            #[test]
948            fn full_name_format(
949                owner in "[a-zA-Z0-9]{1,15}",
950                repo in "[a-zA-Z0-9]{1,15}"
951            ) {
952                let r = GitHubRepoRef {
953                    owner: owner.clone(),
954                    repo: repo.clone(),
955                };
956                let full = r.full_name();
957                assert_eq!(full, format!("{owner}/{repo}"));
958                assert!(full.contains('/'));
959            }
960
961            /// `github_repo_guesses_from_slug` never panics.
962            #[test]
963            fn slug_guesses_never_panics(s in ".{0,100}") {
964                let _ = github_repo_guesses_from_slug(&s);
965            }
966
967            /// Slug guesses all have non-empty owner and repo.
968            #[test]
969            fn slug_guesses_fields_nonempty(slug in "[a-zA-Z0-9_-]{1,30}") {
970                for guess in github_repo_guesses_from_slug(&slug) {
971                    assert!(!guess.owner.is_empty());
972                    assert!(!guess.repo.is_empty());
973                }
974            }
975
976            /// Slugs without hyphens produce no guesses.
977            #[test]
978            fn slug_no_hyphen_empty(slug in "[a-zA-Z0-9]{1,20}") {
979                assert!(
980                    github_repo_guesses_from_slug(&slug).is_empty(),
981                    "expected no guesses for hyphenless slug: {slug}"
982                );
983            }
984
985            /// `parse_github_repo_response` preserves numeric fields.
986            #[test]
987            fn github_response_preserves_values(
988                stars in 0u64..10_000_000,
989                forks in 0u64..1_000_000,
990                issues in 0u64..100_000
991            ) {
992                let json = format!(
993                    r#"{{"full_name":"o/r","stargazers_count":{stars},"forks_count":{forks},"open_issues_count":{issues}}}"#
994                );
995                let m = parse_github_repo_response(&json).unwrap();
996                assert_eq!(m.stars, stars);
997                assert_eq!(m.forks, forks);
998                assert_eq!(m.open_issues, issues);
999            }
1000
1001            /// `parse_github_repo_response` fails on invalid JSON.
1002            #[test]
1003            fn github_response_invalid_json(s in "[a-z]{5,20}") {
1004                assert!(parse_github_repo_response(&s).is_err());
1005            }
1006
1007            /// `parse_npm_downloads_response` returns `None` when error field is present.
1008            #[test]
1009            fn npm_downloads_error_returns_none(msg in "[a-z ]{1,30}") {
1010                let json = format!(r#"{{"error":"{msg}","downloads":42}}"#);
1011                assert_eq!(parse_npm_downloads_response(&json).unwrap(), None);
1012            }
1013
1014            /// `parse_npm_downloads_response` preserves download count.
1015            #[test]
1016            fn npm_downloads_value_preserved(n in 0u64..100_000_000) {
1017                let json = format!(r#"{{"downloads":{n}}}"#);
1018                assert_eq!(parse_npm_downloads_response(&json).unwrap(), Some(n));
1019            }
1020
1021            /// `parse_npm_downloads_response` fails on invalid JSON.
1022            #[test]
1023            fn npm_downloads_invalid_json(s in "[a-z]{5,20}") {
1024                assert!(parse_npm_downloads_response(&s).is_err());
1025            }
1026
1027            /// `parse_npm_registry_response` extracts repository URL from string form.
1028            #[test]
1029            fn npm_registry_string_repo_url(url in "https://[a-z]{3,10}\\.com/[a-z]{1,10}") {
1030                let json = format!(r#"{{"repository":"{url}"}}"#);
1031                let meta = parse_npm_registry_response(&json).unwrap();
1032                assert_eq!(meta.repository_url.as_deref(), Some(url.as_str()));
1033            }
1034
1035            /// `parse_npm_registry_response` extracts repository URL from object form.
1036            #[test]
1037            fn npm_registry_object_repo_url(url in "https://[a-z]{3,10}\\.com/[a-z]{1,10}") {
1038                let json = format!(r#"{{"repository":{{"type":"git","url":"{url}"}}}}"#);
1039                let meta = parse_npm_registry_response(&json).unwrap();
1040                assert_eq!(meta.repository_url.as_deref(), Some(url.as_str()));
1041            }
1042
1043            /// `parse_npm_registry_response` fails on invalid JSON.
1044            #[test]
1045            fn npm_registry_invalid_json(s in "[a-z]{5,20}") {
1046                assert!(parse_npm_registry_response(&s).is_err());
1047            }
1048
1049            /// `PopularityEvidence` serde roundtrip.
1050            #[test]
1051            fn popularity_evidence_serde_roundtrip(
1052                stars in prop::option::of(0u64..1_000_000),
1053                forks in prop::option::of(0u64..100_000),
1054                weekly in prop::option::of(0u64..10_000_000)
1055            ) {
1056                let ev = PopularityEvidence {
1057                    github_stars: stars,
1058                    github_forks: forks,
1059                    npm_downloads_weekly: weekly,
1060                    ..PopularityEvidence::default()
1061                };
1062                let json = serde_json::to_string(&ev).unwrap();
1063                let back: PopularityEvidence = serde_json::from_str(&json).unwrap();
1064                assert_eq!(back.github_stars, stars);
1065                assert_eq!(back.github_forks, forks);
1066                assert_eq!(back.npm_downloads_weekly, weekly);
1067            }
1068        }
1069    }
1070}