Skip to main content

pi/
extension_index.rs

1//! Extension discovery index (offline-first).
2//!
3//! This module provides a local, searchable index of available extensions. The index is:
4//! - **Offline-first**: Pi ships a bundled seed index embedded at compile time.
5//! - **Fail-open**: cache load/refresh failures should never break discovery.
6//! - **Host-agnostic**: the index is primarily a data structure; CLI commands live elsewhere.
7
8use crate::config::Config;
9use crate::error::{Error, Result};
10use crate::http::client::Client;
11use chrono::{DateTime, Utc};
12use serde::{Deserialize, Serialize};
13use std::collections::{BTreeMap, BTreeSet};
14use std::io::Write as _;
15use std::path::{Path, PathBuf};
16use std::sync::{Mutex, OnceLock};
17use std::time::Duration;
18use tempfile::NamedTempFile;
19
20pub const EXTENSION_INDEX_SCHEMA: &str = "pi.ext.index.v1";
21pub const EXTENSION_INDEX_VERSION: u32 = 1;
22pub const DEFAULT_INDEX_MAX_AGE: Duration = Duration::from_secs(60 * 60 * 24);
23const DEFAULT_NPM_QUERY: &str = "keywords:pi-extension";
24const DEFAULT_GITHUB_QUERY: &str = "topic:pi-extension";
25const DEFAULT_REMOTE_LIMIT: usize = 100;
26const REMOTE_REQUEST_TIMEOUT: Duration = Duration::from_secs(15);
27
28#[derive(Debug, Clone, Serialize, Deserialize)]
29#[serde(rename_all = "camelCase")]
30pub struct ExtensionIndex {
31    pub schema: String,
32    pub version: u32,
33    #[serde(default, skip_serializing_if = "Option::is_none")]
34    pub generated_at: Option<String>,
35    #[serde(default, skip_serializing_if = "Option::is_none")]
36    pub last_refreshed_at: Option<String>,
37    #[serde(default)]
38    pub entries: Vec<ExtensionIndexEntry>,
39}
40
41impl ExtensionIndex {
42    #[must_use]
43    pub fn new_empty() -> Self {
44        Self {
45            schema: EXTENSION_INDEX_SCHEMA.to_string(),
46            version: EXTENSION_INDEX_VERSION,
47            generated_at: Some(Utc::now().to_rfc3339()),
48            last_refreshed_at: None,
49            entries: Vec::new(),
50        }
51    }
52
53    pub fn validate(&self) -> Result<()> {
54        if self.schema != EXTENSION_INDEX_SCHEMA {
55            return Err(Error::validation(format!(
56                "Unsupported extension index schema: {}",
57                self.schema
58            )));
59        }
60        if self.version != EXTENSION_INDEX_VERSION {
61            return Err(Error::validation(format!(
62                "Unsupported extension index version: {}",
63                self.version
64            )));
65        }
66        Ok(())
67    }
68
69    #[must_use]
70    pub fn is_stale(&self, now: DateTime<Utc>, max_age: Duration) -> bool {
71        let Some(ts) = &self.last_refreshed_at else {
72            return true;
73        };
74        let Ok(parsed) = DateTime::parse_from_rfc3339(ts) else {
75            return true;
76        };
77        let parsed = parsed.with_timezone(&Utc);
78        now.signed_duration_since(parsed)
79            .to_std()
80            .map_or(true, |age| age >= max_age)
81    }
82
83    /// Resolve a unique `installSource` for an id/name, if present.
84    ///
85    /// This is used to support ergonomic forms like `pi install checkpoint-pi` without requiring
86    /// users to spell out `npm:` / `git:` prefixes. If resolution is ambiguous, returns `None`.
87    #[must_use]
88    pub fn resolve_install_source(&self, query: &str) -> Option<String> {
89        let q = query.trim();
90        if q.is_empty() {
91            return None;
92        }
93        let q_lc = q.to_ascii_lowercase();
94
95        let mut sources: BTreeSet<String> = BTreeSet::new();
96        for entry in &self.entries {
97            let Some(install) = &entry.install_source else {
98                continue;
99            };
100
101            if entry.name.eq_ignore_ascii_case(q) || entry.id.eq_ignore_ascii_case(q) {
102                sources.insert(install.clone());
103                continue;
104            }
105
106            // Convenience: `npm/<name>` or `<name>` for npm entries.
107            if let Some(ExtensionIndexSource::Npm { package, .. }) = &entry.source {
108                if package.to_ascii_lowercase() == q_lc {
109                    sources.insert(install.clone());
110                    continue;
111                }
112            }
113
114            if let Some(rest) = entry.id.strip_prefix("npm/") {
115                if rest.eq_ignore_ascii_case(q) {
116                    sources.insert(install.clone());
117                }
118            }
119        }
120
121        if sources.len() == 1 {
122            sources.into_iter().next()
123        } else {
124            None
125        }
126    }
127
128    #[must_use]
129    pub fn search(&self, query: &str, limit: usize) -> Vec<ExtensionSearchHit> {
130        let q = query.trim();
131        if q.is_empty() || limit == 0 {
132            return Vec::new();
133        }
134
135        let tokens = q
136            .split_whitespace()
137            .map(|t| t.trim().to_ascii_lowercase())
138            .filter(|t| !t.is_empty())
139            .collect::<Vec<_>>();
140        if tokens.is_empty() {
141            return Vec::new();
142        }
143
144        let mut hits = self
145            .entries
146            .iter()
147            .filter_map(|entry| {
148                let score = score_entry(entry, &tokens);
149                if score <= 0 {
150                    None
151                } else {
152                    Some(ExtensionSearchHit {
153                        entry: entry.clone(),
154                        score,
155                    })
156                }
157            })
158            .collect::<Vec<_>>();
159
160        hits.sort_by(|a, b| {
161            b.score
162                .cmp(&a.score)
163                .then_with(|| {
164                    b.entry
165                        .install_source
166                        .is_some()
167                        .cmp(&a.entry.install_source.is_some())
168                })
169                .then_with(|| {
170                    a.entry
171                        .name
172                        .to_ascii_lowercase()
173                        .cmp(&b.entry.name.to_ascii_lowercase())
174                })
175                .then_with(|| {
176                    a.entry
177                        .id
178                        .to_ascii_lowercase()
179                        .cmp(&b.entry.id.to_ascii_lowercase())
180                })
181        });
182
183        hits.truncate(limit);
184        hits
185    }
186}
187
188#[derive(Debug, Clone, Serialize, Deserialize)]
189#[serde(rename_all = "camelCase")]
190pub struct ExtensionIndexEntry {
191    /// Globally unique id within the index (stable key).
192    pub id: String,
193    /// Primary display name (often npm package name or repo name).
194    pub name: String,
195    #[serde(default, skip_serializing_if = "Option::is_none")]
196    pub description: Option<String>,
197    #[serde(default)]
198    pub tags: Vec<String>,
199    #[serde(default, skip_serializing_if = "Option::is_none")]
200    pub license: Option<String>,
201    #[serde(default, skip_serializing_if = "Option::is_none")]
202    pub source: Option<ExtensionIndexSource>,
203    /// Optional source string compatible with Pi's package manager (e.g. `npm:pkg@ver`).
204    #[serde(default, skip_serializing_if = "Option::is_none")]
205    pub install_source: Option<String>,
206}
207
208#[derive(Debug, Clone, Serialize, Deserialize)]
209#[serde(tag = "type", rename_all = "lowercase")]
210pub enum ExtensionIndexSource {
211    Npm {
212        package: String,
213        #[serde(default, skip_serializing_if = "Option::is_none")]
214        version: Option<String>,
215        #[serde(default, skip_serializing_if = "Option::is_none")]
216        url: Option<String>,
217    },
218    Git {
219        repo: String,
220        #[serde(default, skip_serializing_if = "Option::is_none")]
221        path: Option<String>,
222        #[serde(default, skip_serializing_if = "Option::is_none")]
223        r#ref: Option<String>,
224    },
225    Url {
226        url: String,
227    },
228}
229
230#[derive(Debug, Clone)]
231pub struct ExtensionSearchHit {
232    pub entry: ExtensionIndexEntry,
233    pub score: i64,
234}
235
236#[derive(Debug, Clone, Default)]
237pub struct ExtensionIndexRefreshStats {
238    pub npm_entries: usize,
239    pub github_entries: usize,
240    pub merged_entries: usize,
241    pub refreshed: bool,
242}
243
244fn score_entry(entry: &ExtensionIndexEntry, tokens: &[String]) -> i64 {
245    let name = entry.name.to_ascii_lowercase();
246    let id = entry.id.to_ascii_lowercase();
247    let description = entry
248        .description
249        .as_ref()
250        .map(|s| s.to_ascii_lowercase())
251        .unwrap_or_default();
252    let tags = entry
253        .tags
254        .iter()
255        .map(|t| t.to_ascii_lowercase())
256        .collect::<Vec<_>>();
257
258    let mut score: i64 = 0;
259    for token in tokens {
260        if name.contains(token) {
261            score += 300;
262        }
263        if id.contains(token) {
264            score += 120;
265        }
266        if description.contains(token) {
267            score += 60;
268        }
269        if tags.iter().any(|t| t.contains(token)) {
270            score += 180;
271        }
272    }
273
274    score
275}
276
277#[derive(Debug, Clone)]
278pub struct ExtensionIndexStore {
279    path: PathBuf,
280}
281
282impl ExtensionIndexStore {
283    #[must_use]
284    pub const fn new(path: PathBuf) -> Self {
285        Self { path }
286    }
287
288    #[must_use]
289    pub fn default_path() -> PathBuf {
290        Config::extension_index_path()
291    }
292
293    #[must_use]
294    pub fn default_store() -> Self {
295        Self::new(Self::default_path())
296    }
297
298    #[must_use]
299    pub fn path(&self) -> &Path {
300        &self.path
301    }
302
303    pub fn load(&self) -> Result<Option<ExtensionIndex>> {
304        if !self.path.exists() {
305            return Ok(None);
306        }
307        let content = std::fs::read_to_string(&self.path)?;
308        let index: ExtensionIndex = serde_json::from_str(&content)?;
309        index.validate()?;
310        Ok(Some(index))
311    }
312
313    pub fn load_or_seed(&self) -> Result<ExtensionIndex> {
314        match self.load() {
315            Ok(Some(index)) => Ok(index),
316            Ok(None) => seed_index(),
317            Err(err) => {
318                tracing::warn!(
319                    "failed to load extension index cache (falling back to seed): {err}"
320                );
321                seed_index()
322            }
323        }
324    }
325
326    pub fn save(&self, index: &ExtensionIndex) -> Result<()> {
327        index.validate()?;
328        if let Some(parent) = self.path.parent() {
329            std::fs::create_dir_all(parent)?;
330            let mut tmp = NamedTempFile::new_in(parent)?;
331            let encoded = serde_json::to_string_pretty(index)?;
332            tmp.write_all(encoded.as_bytes())?;
333            tmp.flush()?;
334            persist_tempfile_for_cache(tmp, &self.path).map_err(|err| {
335                Error::config(format!(
336                    "Failed to persist extension index to {}: {err}",
337                    self.path.display()
338                ))
339            })
340        } else {
341            Err(Error::config(format!(
342                "Invalid extension index path: {}",
343                self.path.display()
344            )))
345        }
346    }
347
348    pub fn resolve_install_source(&self, query: &str) -> Result<Option<String>> {
349        let index = self.load_or_seed()?;
350        Ok(index.resolve_install_source(query))
351    }
352
353    pub async fn load_or_refresh_best_effort(
354        &self,
355        client: &Client,
356        max_age: Duration,
357    ) -> Result<ExtensionIndex> {
358        let current = self.load_or_seed()?;
359        if current.is_stale(Utc::now(), max_age) {
360            let (refreshed, _) = self.refresh_best_effort(client).await?;
361            return Ok(refreshed);
362        }
363        Ok(current)
364    }
365
366    pub async fn refresh_best_effort(
367        &self,
368        client: &Client,
369    ) -> Result<(ExtensionIndex, ExtensionIndexRefreshStats)> {
370        let mut current = self.load_or_seed()?;
371
372        let npm_entries = match fetch_npm_entries(client, DEFAULT_REMOTE_LIMIT).await {
373            Ok(entries) => entries,
374            Err(err) => {
375                tracing::warn!("npm extension index refresh failed: {err}");
376                Vec::new()
377            }
378        };
379        let github_entries = match fetch_github_entries(client, DEFAULT_REMOTE_LIMIT).await {
380            Ok(entries) => entries,
381            Err(err) => {
382                tracing::warn!("github extension index refresh failed: {err}");
383                Vec::new()
384            }
385        };
386
387        let npm_count = npm_entries.len();
388        let github_count = github_entries.len();
389        if npm_count == 0 && github_count == 0 {
390            return Ok((
391                current,
392                ExtensionIndexRefreshStats {
393                    npm_entries: 0,
394                    github_entries: 0,
395                    merged_entries: 0,
396                    refreshed: false,
397                },
398            ));
399        }
400
401        current.entries = merge_entries(current.entries, npm_entries, github_entries);
402        current.last_refreshed_at = Some(Utc::now().to_rfc3339());
403        if let Err(err) = self.save(&current) {
404            tracing::warn!("failed to persist refreshed extension index cache: {err}");
405        }
406
407        Ok((
408            current.clone(),
409            ExtensionIndexRefreshStats {
410                npm_entries: npm_count,
411                github_entries: github_count,
412                merged_entries: current.entries.len(),
413                refreshed: true,
414            },
415        ))
416    }
417}
418
419fn persist_tempfile_for_cache(tmp: NamedTempFile, path: &Path) -> std::io::Result<()> {
420    let _persist_guard = extension_index_persist_lock()
421        .lock()
422        .unwrap_or_else(std::sync::PoisonError::into_inner);
423    match tmp.persist(path) {
424        Ok(_) => Ok(()),
425        Err(err) => persist_tempfile_for_cache_after_conflict(err, path),
426    }
427}
428
429fn extension_index_persist_lock() -> &'static Mutex<()> {
430    static PERSIST_LOCK: OnceLock<Mutex<()>> = OnceLock::new();
431    PERSIST_LOCK.get_or_init(|| Mutex::new(()))
432}
433
434#[cfg(windows)]
435fn persist_tempfile_for_cache_after_conflict(
436    err: tempfile::PersistError,
437    path: &Path,
438) -> std::io::Result<()> {
439    if err.error.kind() != std::io::ErrorKind::AlreadyExists {
440        return Err(err.error);
441    }
442
443    // Extension index writes are documented as fail-open cache refreshes.
444    // On Windows, `persist()` may reject replacing an existing file, so retry
445    // with a best-effort remove+persist fallback instead of surfacing a
446    // permanent refresh failure.
447    match std::fs::remove_file(path) {
448        Ok(()) => {}
449        Err(remove_err) if remove_err.kind() == std::io::ErrorKind::NotFound => {}
450        Err(remove_err) => return Err(remove_err),
451    }
452
453    err.file
454        .persist(path)
455        .map(|_| ())
456        .map_err(|persist_err| persist_err.error)
457}
458
459#[cfg(not(windows))]
460fn persist_tempfile_for_cache_after_conflict(
461    err: tempfile::PersistError,
462    _path: &Path,
463) -> std::io::Result<()> {
464    Err(err.error)
465}
466
467fn merge_entries(
468    existing: Vec<ExtensionIndexEntry>,
469    npm_entries: Vec<ExtensionIndexEntry>,
470    github_entries: Vec<ExtensionIndexEntry>,
471) -> Vec<ExtensionIndexEntry> {
472    let mut by_id = BTreeMap::<String, ExtensionIndexEntry>::new();
473    for entry in existing {
474        by_id.insert(entry.id.to_ascii_lowercase(), entry);
475    }
476
477    for incoming in npm_entries.into_iter().chain(github_entries) {
478        let key = incoming.id.to_ascii_lowercase();
479        if let Some(entry) = by_id.get_mut(&key) {
480            merge_entry(entry, incoming);
481        } else {
482            by_id.insert(key, incoming);
483        }
484    }
485
486    let mut entries = by_id.into_values().collect::<Vec<_>>();
487    entries.sort_by_key(|entry| entry.id.to_ascii_lowercase());
488    entries
489}
490
491fn merge_entry(existing: &mut ExtensionIndexEntry, incoming: ExtensionIndexEntry) {
492    if !incoming.name.trim().is_empty() {
493        existing.name = incoming.name;
494    }
495    if incoming.description.is_some() {
496        existing.description = incoming.description;
497    }
498    if incoming.license.is_some() {
499        existing.license = incoming.license;
500    }
501    if incoming.source.is_some() {
502        existing.source = incoming.source;
503    }
504    if incoming.install_source.is_some() {
505        existing.install_source = incoming.install_source;
506    }
507    existing.tags = merge_tags(existing.tags.iter().cloned(), incoming.tags);
508}
509
510fn merge_tags(
511    left: impl IntoIterator<Item = String>,
512    right: impl IntoIterator<Item = String>,
513) -> Vec<String> {
514    let mut tags = BTreeSet::new();
515    for tag in left.into_iter().chain(right) {
516        let trimmed = tag.trim();
517        if !trimmed.is_empty() {
518            tags.insert(trimmed.to_string());
519        }
520    }
521    tags.into_iter().collect()
522}
523
524async fn fetch_npm_entries(client: &Client, limit: usize) -> Result<Vec<ExtensionIndexEntry>> {
525    let query =
526        url::form_urlencoded::byte_serialize(DEFAULT_NPM_QUERY.as_bytes()).collect::<String>();
527    let size = limit.clamp(1, DEFAULT_REMOTE_LIMIT);
528    let url = format!("https://registry.npmjs.org/-/v1/search?text={query}&size={size}");
529    let response = client
530        .get(&url)
531        .timeout(REMOTE_REQUEST_TIMEOUT)
532        .send()
533        .await?;
534    let status = response.status();
535    let body = response.text().await?;
536    if status != 200 {
537        return Err(Error::api(format!(
538            "npm extension search failed with status {status}"
539        )));
540    }
541
542    parse_npm_search_entries(&body)
543}
544
545async fn fetch_github_entries(client: &Client, limit: usize) -> Result<Vec<ExtensionIndexEntry>> {
546    let query =
547        url::form_urlencoded::byte_serialize(DEFAULT_GITHUB_QUERY.as_bytes()).collect::<String>();
548    let per_page = limit.clamp(1, DEFAULT_REMOTE_LIMIT);
549    let url = format!(
550        "https://api.github.com/search/repositories?q={query}&sort=updated&order=desc&per_page={per_page}"
551    );
552    let response = client
553        .get(&url)
554        .timeout(REMOTE_REQUEST_TIMEOUT)
555        .header("Accept", "application/vnd.github+json")
556        .send()
557        .await?;
558    let status = response.status();
559    let body = response.text().await?;
560    if status != 200 {
561        return Err(Error::api(format!(
562            "GitHub extension search failed with status {status}"
563        )));
564    }
565
566    parse_github_search_entries(&body)
567}
568
569fn parse_npm_search_entries(body: &str) -> Result<Vec<ExtensionIndexEntry>> {
570    #[derive(Debug, Deserialize)]
571    struct NpmSearchResponse {
572        #[serde(default)]
573        objects: Vec<NpmSearchObject>,
574    }
575
576    #[derive(Debug, Deserialize)]
577    struct NpmSearchObject {
578        package: NpmPackage,
579    }
580
581    #[derive(Debug, Deserialize)]
582    #[serde(rename_all = "camelCase")]
583    struct NpmPackage {
584        name: String,
585        #[serde(default)]
586        version: Option<String>,
587        #[serde(default)]
588        description: Option<String>,
589        #[serde(default)]
590        keywords: Vec<String>,
591        #[serde(default)]
592        license: Option<String>,
593        #[serde(default)]
594        links: NpmLinks,
595    }
596
597    #[derive(Debug, Default, Deserialize)]
598    struct NpmLinks {
599        #[serde(default)]
600        npm: Option<String>,
601    }
602
603    let parsed: NpmSearchResponse = serde_json::from_str(body)
604        .map_err(|err| Error::api(format!("npm search response parse error: {err}")))?;
605
606    let mut entries = Vec::with_capacity(parsed.objects.len());
607    for object in parsed.objects {
608        let package = object.package;
609        let version = package.version.as_deref().and_then(non_empty);
610        let install_spec = version.as_ref().map_or_else(
611            || package.name.clone(),
612            |ver| format!("{}@{ver}", package.name),
613        );
614        let license = normalize_license(package.license.as_deref());
615        let description = package.description.as_deref().and_then(non_empty);
616        let tags = merge_tags(
617            vec!["npm".to_string(), "extension".to_string()],
618            package
619                .keywords
620                .into_iter()
621                .map(|keyword| keyword.to_ascii_lowercase()),
622        );
623
624        entries.push(ExtensionIndexEntry {
625            id: format!("npm/{}", package.name),
626            name: package.name.clone(),
627            description,
628            tags,
629            license,
630            source: Some(ExtensionIndexSource::Npm {
631                package: package.name.clone(),
632                version,
633                url: package.links.npm.clone(),
634            }),
635            install_source: Some(format!("npm:{install_spec}")),
636        });
637    }
638
639    Ok(entries)
640}
641
642fn parse_github_search_entries(body: &str) -> Result<Vec<ExtensionIndexEntry>> {
643    #[derive(Debug, Deserialize)]
644    struct GitHubSearchResponse {
645        #[serde(default)]
646        items: Vec<GitHubRepo>,
647    }
648
649    #[derive(Debug, Deserialize)]
650    struct GitHubRepo {
651        full_name: String,
652        name: String,
653        #[serde(default)]
654        description: Option<String>,
655        #[serde(default)]
656        topics: Vec<String>,
657        #[serde(default)]
658        license: Option<GitHubLicense>,
659    }
660
661    #[derive(Debug, Deserialize)]
662    struct GitHubLicense {
663        #[serde(default)]
664        spdx_id: Option<String>,
665    }
666
667    let parsed: GitHubSearchResponse = serde_json::from_str(body)
668        .map_err(|err| Error::api(format!("GitHub search response parse error: {err}")))?;
669
670    let mut entries = Vec::with_capacity(parsed.items.len());
671    for item in parsed.items {
672        let spdx_id = item.license.and_then(|value| value.spdx_id);
673        let license = spdx_id
674            .as_deref()
675            .and_then(non_empty)
676            .filter(|value| !value.eq_ignore_ascii_case("NOASSERTION"));
677        let tags = merge_tags(
678            vec!["git".to_string(), "extension".to_string()],
679            item.topics
680                .into_iter()
681                .map(|topic| topic.to_ascii_lowercase()),
682        );
683
684        entries.push(ExtensionIndexEntry {
685            id: format!("git/{}", item.full_name),
686            name: item.name,
687            description: item.description.as_deref().and_then(non_empty),
688            tags,
689            license,
690            source: Some(ExtensionIndexSource::Git {
691                repo: item.full_name.clone(),
692                path: None,
693                r#ref: None,
694            }),
695            install_source: Some(format!("git:{}", item.full_name)),
696        });
697    }
698
699    Ok(entries)
700}
701
702fn normalize_license(value: Option<&str>) -> Option<String> {
703    value
704        .and_then(non_empty)
705        .filter(|license| !license.eq_ignore_ascii_case("unknown"))
706}
707
708fn non_empty(value: &str) -> Option<String> {
709    let trimmed = value.trim();
710    if trimmed.is_empty() {
711        None
712    } else {
713        Some(trimmed.to_string())
714    }
715}
716
717// ============================================================================
718// Seed Index (Bundled)
719// ============================================================================
720
721const SEED_ARTIFACT_PROVENANCE_JSON: &str =
722    include_str!("../docs/extension-artifact-provenance.json");
723
724#[derive(Debug, Deserialize)]
725struct ArtifactProvenance {
726    #[serde(rename = "$schema")]
727    _schema: Option<String>,
728    #[serde(default)]
729    generated: Option<String>,
730    #[serde(default)]
731    items: Vec<ArtifactProvenanceItem>,
732}
733
734#[derive(Debug, Deserialize)]
735struct ArtifactProvenanceItem {
736    id: String,
737    name: String,
738    #[serde(default)]
739    license: Option<String>,
740    source: ArtifactProvenanceSource,
741}
742
743#[derive(Debug, Deserialize)]
744#[serde(tag = "type", rename_all = "lowercase")]
745enum ArtifactProvenanceSource {
746    Git {
747        repo: String,
748        #[serde(default)]
749        path: Option<String>,
750    },
751    Npm {
752        package: String,
753        #[serde(default)]
754        version: Option<String>,
755        #[serde(default)]
756        url: Option<String>,
757    },
758    Url {
759        url: String,
760    },
761}
762
763pub fn seed_index() -> Result<ExtensionIndex> {
764    let provenance: ArtifactProvenance = serde_json::from_str(SEED_ARTIFACT_PROVENANCE_JSON)?;
765    let generated_at = provenance.generated;
766
767    let mut entries = Vec::with_capacity(provenance.items.len());
768    for item in provenance.items {
769        let license = item
770            .license
771            .clone()
772            .filter(|value| !value.trim().is_empty() && !value.eq_ignore_ascii_case("unknown"));
773
774        let (source, install_source, tags) = match &item.source {
775            ArtifactProvenanceSource::Npm {
776                package,
777                version,
778                url,
779            } => {
780                let spec = version
781                    .as_ref()
782                    .map_or_else(|| package.clone(), |v| format!("{}@{}", package, v.trim()));
783                (
784                    Some(ExtensionIndexSource::Npm {
785                        package: package.clone(),
786                        version: version.clone(),
787                        url: url.clone(),
788                    }),
789                    Some(format!("npm:{spec}")),
790                    vec!["npm".to_string(), "extension".to_string()],
791                )
792            }
793            ArtifactProvenanceSource::Git { repo, path } => {
794                let install_source = path.as_ref().map_or_else(
795                    || Some(format!("git:{repo}")),
796                    |_| None, // deep path entries typically require a package filter
797                );
798                (
799                    Some(ExtensionIndexSource::Git {
800                        repo: repo.clone(),
801                        path: path.clone(),
802                        r#ref: None,
803                    }),
804                    install_source,
805                    vec!["git".to_string(), "extension".to_string()],
806                )
807            }
808            ArtifactProvenanceSource::Url { url } => (
809                Some(ExtensionIndexSource::Url { url: url.clone() }),
810                None,
811                vec!["url".to_string(), "extension".to_string()],
812            ),
813        };
814
815        entries.push(ExtensionIndexEntry {
816            id: item.id,
817            name: item.name,
818            description: None,
819            tags,
820            license,
821            source,
822            install_source,
823        });
824    }
825
826    entries.sort_by_key(|entry| entry.id.to_ascii_lowercase());
827
828    Ok(ExtensionIndex {
829        schema: EXTENSION_INDEX_SCHEMA.to_string(),
830        version: EXTENSION_INDEX_VERSION,
831        generated_at,
832        last_refreshed_at: None,
833        entries,
834    })
835}
836
837#[cfg(test)]
838mod tests {
839    use super::{
840        EXTENSION_INDEX_SCHEMA, EXTENSION_INDEX_VERSION, ExtensionIndex, ExtensionIndexEntry,
841        ExtensionIndexSource, ExtensionIndexStore, merge_entries, merge_tags, non_empty,
842        normalize_license, parse_github_search_entries, parse_npm_search_entries, score_entry,
843        seed_index,
844    };
845    use chrono::{Duration as ChronoDuration, Utc};
846    use std::time::Duration;
847
848    #[test]
849    fn seed_index_parses_and_has_entries() {
850        let index = seed_index().expect("seed index");
851        assert!(index.entries.len() > 10);
852    }
853
854    #[test]
855    fn seed_index_uses_npm_package_for_install_source() {
856        let index = seed_index().expect("seed index");
857        let entry = index
858            .entries
859            .iter()
860            .find(|entry| {
861                matches!(
862                    &entry.source,
863                    Some(ExtensionIndexSource::Npm { package, .. }) if package != &entry.name
864                )
865            })
866            .expect("seed should include an npm package whose display name differs from package");
867
868        let Some(ExtensionIndexSource::Npm {
869            package, version, ..
870        }) = &entry.source
871        else {
872            unreachable!("entry source should be npm");
873        };
874
875        let expected_install = version.as_ref().map_or_else(
876            || format!("npm:{package}"),
877            |version| format!("npm:{package}@{version}"),
878        );
879        assert_eq!(
880            entry.install_source.as_deref(),
881            Some(expected_install.as_str())
882        );
883    }
884
885    #[test]
886    fn resolve_install_source_requires_unique_match() {
887        let index = ExtensionIndex {
888            schema: super::EXTENSION_INDEX_SCHEMA.to_string(),
889            version: super::EXTENSION_INDEX_VERSION,
890            generated_at: None,
891            last_refreshed_at: None,
892            entries: vec![
893                ExtensionIndexEntry {
894                    id: "npm/foo".to_string(),
895                    name: "foo".to_string(),
896                    description: None,
897                    tags: Vec::new(),
898                    license: None,
899                    source: None,
900                    install_source: Some("npm:foo@1.0.0".to_string()),
901                },
902                ExtensionIndexEntry {
903                    id: "npm/foo-alt".to_string(),
904                    name: "foo".to_string(),
905                    description: None,
906                    tags: Vec::new(),
907                    license: None,
908                    source: None,
909                    install_source: Some("npm:foo@2.0.0".to_string()),
910                },
911            ],
912        };
913
914        assert_eq!(index.resolve_install_source("foo"), None);
915        assert_eq!(
916            index.resolve_install_source("npm/foo"),
917            Some("npm:foo@1.0.0".to_string())
918        );
919    }
920
921    #[test]
922    fn store_resolve_install_source_falls_back_to_seed() {
923        let store = ExtensionIndexStore::new(std::path::PathBuf::from("this-file-does-not-exist"));
924        let resolved = store.resolve_install_source("checkpoint-pi");
925        // The exact seed contents can change; the important part is "no error".
926        assert!(resolved.is_ok());
927    }
928
929    #[test]
930    fn parse_npm_search_entries_maps_install_sources() {
931        let body = r#"{
932          "objects": [
933            {
934              "package": {
935                "name": "checkpoint-pi",
936                "version": "1.2.3",
937                "description": "checkpoint helper",
938                "keywords": ["pi-extension", "checkpoint"],
939                "license": "MIT",
940                "links": { "npm": "https://www.npmjs.com/package/checkpoint-pi" }
941              }
942            }
943          ]
944        }"#;
945
946        let entries = parse_npm_search_entries(body).expect("parse npm search");
947        assert_eq!(entries.len(), 1);
948        let entry = &entries[0];
949        assert_eq!(entry.id, "npm/checkpoint-pi");
950        assert_eq!(
951            entry.install_source.as_deref(),
952            Some("npm:checkpoint-pi@1.2.3")
953        );
954        assert!(entry.tags.iter().any(|tag| tag == "checkpoint"));
955    }
956
957    #[test]
958    fn parse_github_search_entries_maps_git_install_sources() {
959        let body = r#"{
960          "items": [
961            {
962              "full_name": "org/pi-cool-ext",
963              "name": "pi-cool-ext",
964              "description": "cool extension",
965              "topics": ["pi-extension", "automation"],
966              "license": { "spdx_id": "Apache-2.0" }
967            }
968          ]
969        }"#;
970
971        let entries = parse_github_search_entries(body).expect("parse github search");
972        assert_eq!(entries.len(), 1);
973        let entry = &entries[0];
974        assert_eq!(entry.id, "git/org/pi-cool-ext");
975        assert_eq!(entry.install_source.as_deref(), Some("git:org/pi-cool-ext"));
976        assert!(entry.tags.iter().any(|tag| tag == "automation"));
977        assert!(matches!(
978            entry.source,
979            Some(ExtensionIndexSource::Git { .. })
980        ));
981    }
982
983    #[test]
984    fn merge_entries_preserves_existing_fields_when_incoming_missing() {
985        let existing = vec![ExtensionIndexEntry {
986            id: "npm/checkpoint-pi".to_string(),
987            name: "checkpoint-pi".to_string(),
988            description: Some("existing description".to_string()),
989            tags: vec!["npm".to_string()],
990            license: Some("MIT".to_string()),
991            source: Some(ExtensionIndexSource::Npm {
992                package: "checkpoint-pi".to_string(),
993                version: Some("1.0.0".to_string()),
994                url: None,
995            }),
996            install_source: Some("npm:checkpoint-pi@1.0.0".to_string()),
997        }];
998        let incoming = vec![ExtensionIndexEntry {
999            id: "npm/checkpoint-pi".to_string(),
1000            name: "checkpoint-pi".to_string(),
1001            description: None,
1002            tags: vec!["extension".to_string()],
1003            license: None,
1004            source: None,
1005            install_source: None,
1006        }];
1007
1008        let merged = merge_entries(existing, incoming, Vec::new());
1009        assert_eq!(merged.len(), 1);
1010        let entry = &merged[0];
1011        assert_eq!(entry.description.as_deref(), Some("existing description"));
1012        assert_eq!(
1013            entry.install_source.as_deref(),
1014            Some("npm:checkpoint-pi@1.0.0")
1015        );
1016        assert!(entry.tags.iter().any(|tag| tag == "npm"));
1017        assert!(entry.tags.iter().any(|tag| tag == "extension"));
1018    }
1019
1020    // ── new_empty ──────────────────────────────────────────────────────
1021
1022    #[test]
1023    fn new_empty_has_correct_schema_and_version() {
1024        let index = ExtensionIndex::new_empty();
1025        assert_eq!(index.schema, EXTENSION_INDEX_SCHEMA);
1026        assert_eq!(index.version, EXTENSION_INDEX_VERSION);
1027        assert!(index.generated_at.is_some());
1028        assert!(index.last_refreshed_at.is_none());
1029        assert!(index.entries.is_empty());
1030    }
1031
1032    // ── validate ───────────────────────────────────────────────────────
1033
1034    #[test]
1035    fn validate_accepts_correct_schema_and_version() {
1036        let index = ExtensionIndex::new_empty();
1037        assert!(index.validate().is_ok());
1038    }
1039
1040    #[test]
1041    fn validate_rejects_wrong_schema() {
1042        let mut index = ExtensionIndex::new_empty();
1043        index.schema = "wrong.schema".to_string();
1044        let err = index.validate().unwrap_err();
1045        assert!(
1046            err.to_string()
1047                .contains("Unsupported extension index schema")
1048        );
1049    }
1050
1051    #[test]
1052    fn validate_rejects_wrong_version() {
1053        let mut index = ExtensionIndex::new_empty();
1054        index.version = 999;
1055        let err = index.validate().unwrap_err();
1056        assert!(
1057            err.to_string()
1058                .contains("Unsupported extension index version")
1059        );
1060    }
1061
1062    // ── is_stale ───────────────────────────────────────────────────────
1063
1064    #[test]
1065    fn is_stale_true_when_no_timestamp() {
1066        let index = ExtensionIndex::new_empty();
1067        assert!(index.is_stale(Utc::now(), Duration::from_secs(3600)));
1068    }
1069
1070    #[test]
1071    fn is_stale_true_when_invalid_timestamp() {
1072        let mut index = ExtensionIndex::new_empty();
1073        index.last_refreshed_at = Some("not-a-date".to_string());
1074        assert!(index.is_stale(Utc::now(), Duration::from_secs(3600)));
1075    }
1076
1077    #[test]
1078    fn is_stale_false_when_fresh() {
1079        let mut index = ExtensionIndex::new_empty();
1080        index.last_refreshed_at = Some(Utc::now().to_rfc3339());
1081        assert!(!index.is_stale(Utc::now(), Duration::from_secs(3600)));
1082    }
1083
1084    #[test]
1085    fn is_stale_true_when_expired() {
1086        let mut index = ExtensionIndex::new_empty();
1087        let old = Utc::now() - ChronoDuration::hours(2);
1088        index.last_refreshed_at = Some(old.to_rfc3339());
1089        assert!(index.is_stale(Utc::now(), Duration::from_secs(3600)));
1090    }
1091
1092    #[test]
1093    fn is_stale_true_at_exact_max_age_boundary() {
1094        let now = Utc::now();
1095        let mut index = ExtensionIndex::new_empty();
1096        index.last_refreshed_at = Some((now - ChronoDuration::hours(1)).to_rfc3339());
1097        assert!(index.is_stale(now, Duration::from_secs(3600)));
1098    }
1099
1100    // ── search ─────────────────────────────────────────────────────────
1101
1102    fn test_entry(id: &str, name: &str, desc: Option<&str>, tags: &[&str]) -> ExtensionIndexEntry {
1103        ExtensionIndexEntry {
1104            id: id.to_string(),
1105            name: name.to_string(),
1106            description: desc.map(std::string::ToString::to_string),
1107            tags: tags.iter().map(std::string::ToString::to_string).collect(),
1108            license: None,
1109            source: None,
1110            install_source: Some(format!("npm:{name}")),
1111        }
1112    }
1113
1114    fn test_index(entries: Vec<ExtensionIndexEntry>) -> ExtensionIndex {
1115        ExtensionIndex {
1116            schema: EXTENSION_INDEX_SCHEMA.to_string(),
1117            version: EXTENSION_INDEX_VERSION,
1118            generated_at: None,
1119            last_refreshed_at: None,
1120            entries,
1121        }
1122    }
1123
1124    #[test]
1125    fn search_empty_query_returns_nothing() {
1126        let index = test_index(vec![test_entry("npm/foo", "foo", None, &[])]);
1127        assert!(index.search("", 10).is_empty());
1128        assert!(index.search("   ", 10).is_empty());
1129    }
1130
1131    #[test]
1132    fn search_zero_limit_returns_nothing() {
1133        let index = test_index(vec![test_entry("npm/foo", "foo", None, &[])]);
1134        assert!(index.search("foo", 0).is_empty());
1135    }
1136
1137    #[test]
1138    fn search_matches_by_name() {
1139        let index = test_index(vec![
1140            test_entry("npm/alpha", "alpha", None, &[]),
1141            test_entry("npm/beta", "beta", None, &[]),
1142        ]);
1143        let hits = index.search("alpha", 10);
1144        assert_eq!(hits.len(), 1);
1145        assert_eq!(hits[0].entry.name, "alpha");
1146    }
1147
1148    #[test]
1149    fn search_matches_by_description() {
1150        let index = test_index(vec![test_entry(
1151            "npm/foo",
1152            "foo",
1153            Some("checkpoint helper"),
1154            &[],
1155        )]);
1156        let hits = index.search("checkpoint", 10);
1157        assert_eq!(hits.len(), 1);
1158    }
1159
1160    #[test]
1161    fn search_matches_by_tag() {
1162        let index = test_index(vec![test_entry("npm/foo", "foo", None, &["automation"])]);
1163        let hits = index.search("automation", 10);
1164        assert_eq!(hits.len(), 1);
1165    }
1166
1167    #[test]
1168    fn search_respects_limit() {
1169        let index = test_index(vec![
1170            test_entry("npm/foo-a", "foo-a", None, &[]),
1171            test_entry("npm/foo-b", "foo-b", None, &[]),
1172            test_entry("npm/foo-c", "foo-c", None, &[]),
1173        ]);
1174        let hits = index.search("foo", 2);
1175        assert_eq!(hits.len(), 2);
1176    }
1177
1178    #[test]
1179    fn search_ranks_name_higher_than_description() {
1180        let index = test_index(vec![
1181            test_entry("npm/other", "other", Some("checkpoint tool"), &[]),
1182            test_entry("npm/checkpoint", "checkpoint", None, &[]),
1183        ]);
1184        let hits = index.search("checkpoint", 10);
1185        assert_eq!(hits.len(), 2);
1186        // Name match (300) beats description match (60)
1187        assert_eq!(hits[0].entry.name, "checkpoint");
1188    }
1189
1190    // ── score_entry ────────────────────────────────────────────────────
1191
1192    #[test]
1193    fn score_entry_name_match_highest() {
1194        let entry = test_entry("npm/foo", "foo", Some("bar"), &["baz"]);
1195        assert_eq!(score_entry(&entry, &["foo".to_string()]), 300 + 120);
1196        // name(300) + id contains "foo" too (120)
1197    }
1198
1199    #[test]
1200    fn score_entry_no_match_returns_zero() {
1201        let entry = test_entry("npm/foo", "foo", None, &[]);
1202        assert_eq!(score_entry(&entry, &["zzz".to_string()]), 0);
1203    }
1204
1205    #[test]
1206    fn score_entry_tag_match() {
1207        let entry = test_entry("npm/bar", "bar", None, &["automation"]);
1208        let score = score_entry(&entry, &["automation".to_string()]);
1209        assert_eq!(score, 180);
1210    }
1211
1212    #[test]
1213    fn score_entry_multiple_tokens_accumulate() {
1214        let entry = test_entry("npm/foo", "foo", Some("great tool"), &["utility"]);
1215        let score = score_entry(&entry, &["foo".to_string(), "great".to_string()]);
1216        // "foo": name(300) + id(120) = 420
1217        // "great": description(60) = 60
1218        assert_eq!(score, 480);
1219    }
1220
1221    // ── merge_tags ─────────────────────────────────────────────────────
1222
1223    #[test]
1224    fn merge_tags_deduplicates() {
1225        let result = merge_tags(
1226            vec!["a".to_string(), "b".to_string()],
1227            vec!["b".to_string(), "c".to_string()],
1228        );
1229        assert_eq!(result, vec!["a", "b", "c"]);
1230    }
1231
1232    #[test]
1233    fn merge_tags_trims_and_skips_empty() {
1234        let result = merge_tags(
1235            vec!["  a  ".to_string(), String::new()],
1236            vec!["  ".to_string(), "b".to_string()],
1237        );
1238        assert_eq!(result, vec!["a", "b"]);
1239    }
1240
1241    // ── normalize_license ──────────────────────────────────────────────
1242
1243    #[test]
1244    fn normalize_license_returns_none_for_none() {
1245        assert_eq!(normalize_license(None), None);
1246    }
1247
1248    #[test]
1249    fn normalize_license_returns_none_for_empty() {
1250        assert_eq!(normalize_license(Some("")), None);
1251        assert_eq!(normalize_license(Some("  ")), None);
1252    }
1253
1254    #[test]
1255    fn normalize_license_returns_none_for_unknown() {
1256        assert_eq!(normalize_license(Some("unknown")), None);
1257        assert_eq!(normalize_license(Some("UNKNOWN")), None);
1258    }
1259
1260    #[test]
1261    fn normalize_license_returns_value_for_valid() {
1262        assert_eq!(normalize_license(Some("MIT")), Some("MIT".to_string()));
1263        assert_eq!(
1264            normalize_license(Some("Apache-2.0")),
1265            Some("Apache-2.0".to_string())
1266        );
1267    }
1268
1269    // ── non_empty ──────────────────────────────────────────────────────
1270
1271    #[test]
1272    fn non_empty_returns_none_for_empty_and_whitespace() {
1273        assert_eq!(non_empty(""), None);
1274        assert_eq!(non_empty("   "), None);
1275    }
1276
1277    #[test]
1278    fn non_empty_trims_and_returns() {
1279        assert_eq!(non_empty("  hello  "), Some("hello".to_string()));
1280    }
1281
1282    // ── resolve_install_source edge cases ──────────────────────────────
1283
1284    #[test]
1285    fn resolve_install_source_empty_query_returns_none() {
1286        let index = test_index(vec![test_entry("npm/foo", "foo", None, &[])]);
1287        assert_eq!(index.resolve_install_source(""), None);
1288        assert_eq!(index.resolve_install_source("   "), None);
1289    }
1290
1291    #[test]
1292    fn resolve_install_source_case_insensitive() {
1293        let index = test_index(vec![ExtensionIndexEntry {
1294            id: "npm/Foo".to_string(),
1295            name: "Foo".to_string(),
1296            description: None,
1297            tags: Vec::new(),
1298            license: None,
1299            source: None,
1300            install_source: Some("npm:Foo".to_string()),
1301        }]);
1302        assert_eq!(
1303            index.resolve_install_source("foo"),
1304            Some("npm:Foo".to_string())
1305        );
1306    }
1307
1308    #[test]
1309    fn resolve_install_source_npm_package_name() {
1310        let index = test_index(vec![ExtensionIndexEntry {
1311            id: "npm/my-ext".to_string(),
1312            name: "my-ext".to_string(),
1313            description: None,
1314            tags: Vec::new(),
1315            license: None,
1316            source: Some(ExtensionIndexSource::Npm {
1317                package: "my-ext".to_string(),
1318                version: Some("1.0.0".to_string()),
1319                url: None,
1320            }),
1321            install_source: Some("npm:my-ext@1.0.0".to_string()),
1322        }]);
1323        assert_eq!(
1324            index.resolve_install_source("my-ext"),
1325            Some("npm:my-ext@1.0.0".to_string())
1326        );
1327    }
1328
1329    #[test]
1330    fn resolve_install_source_no_install_source_returns_none() {
1331        let index = test_index(vec![ExtensionIndexEntry {
1332            id: "npm/foo".to_string(),
1333            name: "foo".to_string(),
1334            description: None,
1335            tags: Vec::new(),
1336            license: None,
1337            source: None,
1338            install_source: None,
1339        }]);
1340        assert_eq!(index.resolve_install_source("foo"), None);
1341    }
1342
1343    // ── ExtensionIndexStore save/load roundtrip ────────────────────────
1344
1345    #[test]
1346    fn store_save_load_roundtrip() {
1347        let temp_dir = tempfile::tempdir().expect("tempdir");
1348        let path = temp_dir.path().join("index.json");
1349        let store = ExtensionIndexStore::new(path);
1350
1351        let mut index = ExtensionIndex::new_empty();
1352        index
1353            .entries
1354            .push(test_entry("npm/rt", "rt", Some("roundtrip"), &["test"]));
1355        store.save(&index).expect("save");
1356
1357        let loaded = store.load().expect("load").expect("some");
1358        assert_eq!(loaded.entries.len(), 1);
1359        assert_eq!(loaded.entries[0].name, "rt");
1360        assert_eq!(loaded.entries[0].description.as_deref(), Some("roundtrip"));
1361    }
1362
1363    #[test]
1364    fn store_save_overwrites_existing_file() {
1365        let temp_dir = tempfile::tempdir().expect("tempdir");
1366        let path = temp_dir.path().join("index.json");
1367        let store = ExtensionIndexStore::new(path);
1368
1369        let mut first = ExtensionIndex::new_empty();
1370        first.entries.push(test_entry(
1371            "npm/first",
1372            "first",
1373            Some("first version"),
1374            &["test"],
1375        ));
1376        store.save(&first).expect("save first");
1377
1378        let mut second = ExtensionIndex::new_empty();
1379        second.generated_at = Some("2026-03-09T00:00:00Z".to_string());
1380        second.last_refreshed_at = Some("2026-03-09T01:00:00Z".to_string());
1381        second.entries.push(test_entry(
1382            "npm/second",
1383            "second",
1384            Some("second version"),
1385            &["fresh"],
1386        ));
1387        store.save(&second).expect("overwrite existing cache");
1388
1389        let loaded = store.load().expect("load").expect("some");
1390        assert_eq!(loaded.entries.len(), 1);
1391        assert_eq!(loaded.entries[0].name, "second");
1392        assert_eq!(
1393            loaded.entries[0].description.as_deref(),
1394            Some("second version")
1395        );
1396        assert_eq!(
1397            loaded.last_refreshed_at.as_deref(),
1398            Some("2026-03-09T01:00:00Z")
1399        );
1400    }
1401
1402    #[test]
1403    fn store_load_nonexistent_returns_none() {
1404        let store = ExtensionIndexStore::new(std::path::PathBuf::from("/nonexistent/path.json"));
1405        assert!(store.load().expect("load").is_none());
1406    }
1407
1408    #[test]
1409    fn store_load_or_seed_falls_back_on_missing() {
1410        let store = ExtensionIndexStore::new(std::path::PathBuf::from("/nonexistent/path.json"));
1411        let index = store.load_or_seed().expect("load_or_seed");
1412        assert!(!index.entries.is_empty());
1413    }
1414
1415    // ── parse edge cases ───────────────────────────────────────────────
1416
1417    #[test]
1418    fn parse_npm_no_version_omits_at_in_install_source() {
1419        let body = r#"{
1420          "objects": [{
1421            "package": {
1422              "name": "bare-ext",
1423              "keywords": [],
1424              "links": {}
1425            }
1426          }]
1427        }"#;
1428        let entries = parse_npm_search_entries(body).expect("parse");
1429        assert_eq!(entries[0].install_source.as_deref(), Some("npm:bare-ext"));
1430    }
1431
1432    #[test]
1433    fn parse_npm_empty_objects_returns_empty() {
1434        let body = r#"{ "objects": [] }"#;
1435        let entries = parse_npm_search_entries(body).expect("parse");
1436        assert!(entries.is_empty());
1437    }
1438
1439    #[test]
1440    fn parse_github_noassertion_license_filtered_out() {
1441        let body = r#"{
1442          "items": [{
1443            "full_name": "org/ext",
1444            "name": "ext",
1445            "topics": [],
1446            "license": { "spdx_id": "NOASSERTION" }
1447          }]
1448        }"#;
1449        let entries = parse_github_search_entries(body).expect("parse");
1450        assert!(entries[0].license.is_none());
1451    }
1452
1453    #[test]
1454    fn parse_github_null_license_ok() {
1455        let body = r#"{
1456          "items": [{
1457            "full_name": "org/ext2",
1458            "name": "ext2",
1459            "topics": []
1460          }]
1461        }"#;
1462        let entries = parse_github_search_entries(body).expect("parse");
1463        assert!(entries[0].license.is_none());
1464    }
1465
1466    // ── merge_entries adds new entries ──────────────────────────────────
1467
1468    #[test]
1469    fn merge_entries_adds_new_and_deduplicates() {
1470        let existing = vec![test_entry("npm/a", "a", None, &[])];
1471        let npm = vec![test_entry("npm/b", "b", None, &[])];
1472        let git = vec![test_entry("git/c", "c", None, &[])];
1473        let merged = merge_entries(existing, npm, git);
1474        assert_eq!(merged.len(), 3);
1475        // Sorted by id
1476        assert_eq!(merged[0].id, "git/c");
1477        assert_eq!(merged[1].id, "npm/a");
1478        assert_eq!(merged[2].id, "npm/b");
1479    }
1480
1481    #[test]
1482    fn merge_entries_case_insensitive_dedup() {
1483        let existing = vec![test_entry("npm/Foo", "Foo", Some("old"), &[])];
1484        let npm = vec![test_entry("npm/foo", "foo", Some("new"), &[])];
1485        let merged = merge_entries(existing, npm, Vec::new());
1486        assert_eq!(merged.len(), 1);
1487        // Incoming overwrites description
1488        assert_eq!(merged[0].description.as_deref(), Some("new"));
1489    }
1490
1491    // ── serde roundtrip ────────────────────────────────────────────────
1492
1493    #[test]
1494    fn extension_index_serde_roundtrip() {
1495        let index = test_index(vec![test_entry("npm/x", "x", Some("desc"), &["tag1"])]);
1496        let json = serde_json::to_string(&index).expect("serialize");
1497        let deserialized: ExtensionIndex = serde_json::from_str(&json).expect("deserialize");
1498        assert_eq!(deserialized.entries.len(), 1);
1499        assert_eq!(deserialized.entries[0].name, "x");
1500    }
1501
1502    #[test]
1503    fn extension_index_entry_source_variants_serialize() {
1504        let npm = ExtensionIndexSource::Npm {
1505            package: "p".to_string(),
1506            version: Some("1.0".to_string()),
1507            url: None,
1508        };
1509        let git = ExtensionIndexSource::Git {
1510            repo: "org/r".to_string(),
1511            path: None,
1512            r#ref: None,
1513        };
1514        let url = ExtensionIndexSource::Url {
1515            url: "https://example.com".to_string(),
1516        };
1517
1518        for source in [npm, git, url] {
1519            let json = serde_json::to_string(&source).expect("serialize");
1520            let _: ExtensionIndexSource = serde_json::from_str(&json).expect("deserialize");
1521        }
1522    }
1523
1524    // ── ExtensionIndexRefreshStats default ─────────────────────────────
1525
1526    #[test]
1527    fn refresh_stats_default_all_zero() {
1528        let stats = super::ExtensionIndexRefreshStats::default();
1529        assert_eq!(stats.npm_entries, 0);
1530        assert_eq!(stats.github_entries, 0);
1531        assert_eq!(stats.merged_entries, 0);
1532        assert!(!stats.refreshed);
1533    }
1534
1535    // ── store path accessor ────────────────────────────────────────────
1536
1537    #[test]
1538    fn store_path_returns_configured_path() {
1539        let store = ExtensionIndexStore::new(std::path::PathBuf::from("/custom/path.json"));
1540        assert_eq!(store.path().to_str().unwrap(), "/custom/path.json");
1541    }
1542
1543    mod proptest_extension_index {
1544        use super::*;
1545        use proptest::prelude::*;
1546
1547        fn make_entry(id: &str, name: &str) -> ExtensionIndexEntry {
1548            ExtensionIndexEntry {
1549                id: id.to_string(),
1550                name: name.to_string(),
1551                description: None,
1552                tags: Vec::new(),
1553                license: None,
1554                source: None,
1555                install_source: None,
1556            }
1557        }
1558
1559        proptest! {
1560            /// `non_empty` returns None for whitespace-only strings.
1561            #[test]
1562            fn non_empty_whitespace(ws in "[ \\t\\n]{0,10}") {
1563                assert!(non_empty(&ws).is_none());
1564            }
1565
1566            /// `non_empty` returns trimmed value for non-empty strings.
1567            #[test]
1568            fn non_empty_trims(s in "[a-z]{1,10}", ws in "[ \\t]{0,3}") {
1569                let padded = format!("{ws}{s}{ws}");
1570                let result = non_empty(&padded).unwrap();
1571                assert_eq!(result, s);
1572            }
1573
1574            /// `normalize_license` filters "unknown" (case-insensitive).
1575            #[test]
1576            fn normalize_license_filters_unknown(
1577                case_idx in 0..3usize
1578            ) {
1579                let variants = ["unknown", "UNKNOWN", "Unknown"];
1580                assert!(normalize_license(Some(variants[case_idx])).is_none());
1581            }
1582
1583            /// `normalize_license(None)` returns None.
1584            #[test]
1585            fn normalize_license_none(_dummy in 0..1u8) {
1586                assert!(normalize_license(None).is_none());
1587            }
1588
1589            /// `normalize_license` passes through valid licenses.
1590            #[test]
1591            fn normalize_license_passthrough(s in "[A-Z]{3,10}") {
1592                if !s.eq_ignore_ascii_case("unknown") {
1593                    assert!(normalize_license(Some(&s)).is_some());
1594                }
1595            }
1596
1597            /// `score_entry` is zero for empty token list.
1598            #[test]
1599            fn score_empty_tokens(name in "[a-z]{1,10}") {
1600                let entry = make_entry("id", &name);
1601                assert_eq!(score_entry(&entry, &[]), 0);
1602            }
1603
1604            /// `score_entry` is non-negative.
1605            #[test]
1606            fn score_non_negative(
1607                name in "[a-z]{1,10}",
1608                token in "[a-z]{1,5}"
1609            ) {
1610                let entry = make_entry("id", &name);
1611                assert!(score_entry(&entry, &[token]) >= 0);
1612            }
1613
1614            /// `score_entry` is case-insensitive.
1615            #[test]
1616            fn score_case_insensitive(name in "[a-z]{1,10}") {
1617                // score_entry expects pre-lowered tokens (search() lowercases them).
1618                // The case-insensitivity is on the *entry* fields, not tokens.
1619                let lower_entry = make_entry("id", &name);
1620                let upper_entry = make_entry("id", &name.to_uppercase());
1621                let search_token = vec![name];
1622                assert_eq!(score_entry(&lower_entry, &search_token), score_entry(&upper_entry, &search_token));
1623            }
1624
1625            /// Name match gives 300 points per token.
1626            #[test]
1627            fn score_name_match(name in "[a-z]{3,8}") {
1628                let entry = make_entry("different-id", &name);
1629                let score = score_entry(&entry, &[name]);
1630                // At minimum 300 for name match (might also match id/description/tags)
1631                assert!(score >= 300);
1632            }
1633
1634            /// `merge_tags` deduplicates.
1635            #[test]
1636            fn merge_tags_dedup(tag in "[a-z]{1,10}") {
1637                let result = merge_tags(
1638                    vec![tag.clone(), tag.clone()],
1639                    vec![tag.clone()],
1640                );
1641                assert_eq!(result.len(), 1);
1642                assert_eq!(result[0], tag);
1643            }
1644
1645            /// `merge_tags` filters empty/whitespace.
1646            #[test]
1647            fn merge_tags_filters_empty(tag in "[a-z]{1,10}") {
1648                let result = merge_tags(
1649                    vec![tag, String::new(), "  ".to_string()],
1650                    vec![],
1651                );
1652                assert_eq!(result.len(), 1);
1653            }
1654
1655            /// `merge_tags` result is sorted (BTreeSet).
1656            #[test]
1657            fn merge_tags_sorted(
1658                a in "[a-z]{1,5}",
1659                b in "[a-z]{1,5}",
1660                c in "[a-z]{1,5}"
1661            ) {
1662                let result = merge_tags(vec![c, a], vec![b]);
1663                for w in result.windows(2) {
1664                    assert!(w[0] <= w[1]);
1665                }
1666            }
1667
1668            /// `merge_tags` preserves all unique tags from both sides.
1669            #[test]
1670            fn merge_tags_preserves(
1671                left in prop::collection::vec("[a-z]{1,5}", 0..5),
1672                right in prop::collection::vec("[a-z]{1,5}", 0..5)
1673            ) {
1674                let result = merge_tags(left.clone(), right.clone());
1675                // Every non-empty tag from either side should be in result
1676                for tag in left.iter().chain(right.iter()) {
1677                    let trimmed = tag.trim();
1678                    if !trimmed.is_empty() {
1679                        assert!(
1680                            result.contains(&trimmed.to_string()),
1681                            "missing tag: {trimmed}"
1682                        );
1683                    }
1684                }
1685            }
1686
1687            /// `merge_entries` keeps casefolded ids unique and sorted.
1688            #[test]
1689            fn merge_entries_unique_sorted_casefold_ids(
1690                existing in prop::collection::vec(("[A-Za-z]{1,8}", "[a-z]{1,8}"), 0..10),
1691                npm in prop::collection::vec(("[A-Za-z]{1,8}", "[a-z]{1,8}"), 0..10),
1692                git in prop::collection::vec(("[A-Za-z]{1,8}", "[a-z]{1,8}"), 0..10)
1693            ) {
1694                let to_entries = |rows: Vec<(String, String)>, prefix: &str| {
1695                    rows.into_iter()
1696                        .map(|(id, name)| make_entry(&format!("{prefix}/{id}"), &name))
1697                        .collect::<Vec<_>>()
1698                };
1699                let merged = merge_entries(
1700                    to_entries(existing, "npm"),
1701                    to_entries(npm, "npm"),
1702                    to_entries(git, "git"),
1703                );
1704
1705                let lower_ids = merged
1706                    .iter()
1707                    .map(|entry| entry.id.to_ascii_lowercase())
1708                    .collect::<Vec<_>>();
1709                let mut sorted = lower_ids.clone();
1710                sorted.sort();
1711                assert_eq!(lower_ids, sorted);
1712
1713                let unique = lower_ids.iter().cloned().collect::<std::collections::BTreeSet<_>>();
1714                assert_eq!(unique.len(), lower_ids.len());
1715            }
1716
1717            /// `search` output is bounded by limit and sorted by non-increasing score.
1718            #[test]
1719            fn search_bounded_and_score_sorted(
1720                rows in prop::collection::vec(("[a-z]{1,8}", "[a-z]{1,8}", prop::option::of("[a-z ]{1,20}")), 0..16),
1721                query in "[a-z]{1,6}",
1722                limit in 0usize..16usize
1723            ) {
1724                let entries = rows
1725                    .into_iter()
1726                    .map(|(id, name, description)| ExtensionIndexEntry {
1727                        id: format!("npm/{id}"),
1728                        name,
1729                        description: description.map(|s| s.trim().to_string()).filter(|s| !s.is_empty()),
1730                        tags: vec!["tag".to_string()],
1731                        license: None,
1732                        source: None,
1733                        install_source: Some(format!("npm:{id}")),
1734                    })
1735                    .collect::<Vec<_>>();
1736                let index = ExtensionIndex {
1737                    schema: EXTENSION_INDEX_SCHEMA.to_string(),
1738                    version: EXTENSION_INDEX_VERSION,
1739                    generated_at: None,
1740                    last_refreshed_at: None,
1741                    entries,
1742                };
1743
1744                let hits = index.search(&query, limit);
1745                assert!(hits.len() <= limit);
1746                assert!(hits.windows(2).all(|pair| pair[0].score >= pair[1].score));
1747                assert!(hits.iter().all(|hit| hit.score > 0));
1748            }
1749
1750            /// Name ambiguity must fail-open to `None`; exact id remains resolvable.
1751            #[test]
1752            fn resolve_install_source_ambiguous_name_none_exact_id_some(
1753                name in "[a-z]{1,10}",
1754                left in "[a-z]{1,8}",
1755                right in "[a-z]{1,8}"
1756            ) {
1757                prop_assume!(!left.eq_ignore_ascii_case(&right));
1758
1759                let left_id = format!("npm/{left}");
1760                let right_id = format!("npm/{right}");
1761                let left_install = format!("npm:{left}@1.0.0");
1762                let right_install = format!("npm:{right}@2.0.0");
1763
1764                let index = ExtensionIndex {
1765                    schema: EXTENSION_INDEX_SCHEMA.to_string(),
1766                    version: EXTENSION_INDEX_VERSION,
1767                    generated_at: None,
1768                    last_refreshed_at: None,
1769                    entries: vec![
1770                        ExtensionIndexEntry {
1771                            id: left_id.clone(),
1772                            name: name.clone(),
1773                            description: None,
1774                            tags: Vec::new(),
1775                            license: None,
1776                            source: Some(ExtensionIndexSource::Npm {
1777                                package: left,
1778                                version: Some("1.0.0".to_string()),
1779                                url: None,
1780                            }),
1781                            install_source: Some(left_install.clone()),
1782                        },
1783                        ExtensionIndexEntry {
1784                            id: right_id.clone(),
1785                            name: name.clone(),
1786                            description: None,
1787                            tags: Vec::new(),
1788                            license: None,
1789                            source: Some(ExtensionIndexSource::Npm {
1790                                package: right,
1791                                version: Some("2.0.0".to_string()),
1792                                url: None,
1793                            }),
1794                            install_source: Some(right_install.clone()),
1795                        },
1796                    ],
1797                };
1798
1799                assert_eq!(index.resolve_install_source(&name), None);
1800                assert_eq!(index.resolve_install_source(&left_id), Some(left_install));
1801                assert_eq!(index.resolve_install_source(&right_id), Some(right_install));
1802            }
1803
1804            /// `ExtensionIndexSource` serde roundtrip for Npm variant.
1805            #[test]
1806            fn source_npm_serde(pkg in "[a-z]{1,10}", ver in "[0-9]\\.[0-9]\\.[0-9]") {
1807                let source = ExtensionIndexSource::Npm {
1808                    package: pkg,
1809                    version: Some(ver),
1810                    url: None,
1811                };
1812                let json = serde_json::to_string(&source).unwrap();
1813                let _: ExtensionIndexSource = serde_json::from_str(&json).unwrap();
1814            }
1815
1816            /// `ExtensionIndexSource` serde roundtrip for Git variant.
1817            #[test]
1818            fn source_git_serde(repo in "[a-z]{1,10}/[a-z]{1,10}") {
1819                let source = ExtensionIndexSource::Git {
1820                    repo,
1821                    path: None,
1822                    r#ref: None,
1823                };
1824                let json = serde_json::to_string(&source).unwrap();
1825                let _: ExtensionIndexSource = serde_json::from_str(&json).unwrap();
1826            }
1827
1828            /// `ExtensionIndexEntry` serde roundtrip.
1829            #[test]
1830            fn entry_serde_roundtrip(
1831                id in "[a-z]{1,10}",
1832                name in "[a-z]{1,10}"
1833            ) {
1834                let entry = make_entry(&id, &name);
1835                let json = serde_json::to_string(&entry).unwrap();
1836                let back: ExtensionIndexEntry = serde_json::from_str(&json).unwrap();
1837                assert_eq!(back.id, id);
1838                assert_eq!(back.name, name);
1839            }
1840        }
1841    }
1842}