Skip to main content

algocline_app/service/
hub.rs

1//! Hub — package discovery, search, and index management.
2//!
3//! The Hub is algocline's package registry layer.  It aggregates remote
4//! index data with local install state so that users (via AI) can
5//! **discover** packages they haven't installed yet, and **inspect**
6//! installed packages with full Card and eval statistics.
7//!
8//! ## Staged design
9//!
10//! | Stage | Scope | Status |
11//! |-------|-------|--------|
12//! | **1** | Card Collection install, Pkg-bundled cards | Done |
13//! | **2** | Hub MCP tools (`hub_search`, `hub_info`, `hub_reindex`), local index | Done |
14//! | **3** | Aggregated remote collection index, `hub_publish`, LP | Planned |
15//!
16//! ## MCP tools
17//!
18//! | Tool | Description |
19//! |------|-------------|
20//! | `alc_hub_search` | Discover packages across remote + local indices |
21//! | `alc_hub_info` | Detailed single-package view (meta + cards + aliases + stats) |
22//! | `alc_hub_reindex` | Rebuild index from local packages or a repo checkout |
23//!
24//! ## Index schema (`hub_index/v0`)
25//!
26//! ```json
27//! {
28//!   "schema_version": "hub_index/v0",
29//!   "updated_at": "2026-04-12T10:00:00Z",
30//!   "packages": [{
31//!     "name": "cot",
32//!     "version": "0.1.0",
33//!     "description": "Chain-of-Thought prompting",
34//!     "category": "reasoning",
35//!     "source": "https://github.com/...",
36//!     "card_count": 3,
37//!     "best_card": { "card_id": "...", "model": "...", "pass_rate": 0.82, "scenario": "..." }
38//!   }]
39//! }
40//! ```
41//!
42//! Index generation uses `init.lua` M.meta parsing only — no Lua VM
43//! required.  This keeps the index buildable in CI environments.
44//!
45//! ## Index URL discovery (4-tier)
46//!
47//! Sources are checked in priority order; URLs are deduplicated:
48//!
49//!   0. **Collection URL** — `[hub].collection_url` in `~/.algocline/config.toml`.
50//!      Aggregated index containing all known packages (Stage 3).
51//!   1. **Hub registries** — `~/.algocline/hub_registries.json`, auto-populated
52//!      by `pkg_install` and `card_install`.
53//!   2. **Installed manifest** — `~/.algocline/installed.json`, fallback for
54//!      sources registered before registries existed.
55//!   3. **Compiled-in seeds** — bundled-packages source for first-run bootstrap.
56//!
57//! GitHub repo URLs are transformed to raw index URLs:
58//!
59//! ```text
60//! https://github.com/{owner}/{repo}
61//!   → https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
62//! ```
63//!
64//! ## Caching
65//!
66//! Remote indices are cached per-source at
67//! `~/.algocline/hub_cache/{hash}.json` where hash is FNV-1a of the
68//! URL.  TTL is 1 hour.
69//!
70//! ## Registry persistence
71//!
72//! `~/.algocline/hub_registries.json` records source URLs from
73//! `pkg_install` and `card_install`.  Written atomically (tempfile +
74//! rename) to avoid corruption on interruption.
75
76use std::collections::{HashMap, HashSet};
77use std::path::PathBuf;
78
79use serde::{Deserialize, Serialize};
80
81use algocline_core::{AppDir, PkgEntity, PkgType};
82
83use super::list_opts::{
84    apply_sort_by_value, matches_filter, parse_sort, project_fields, resolve_fields, ListOpts,
85    HUB_SEARCH_FULL, HUB_SEARCH_SUMMARY,
86};
87use super::manifest;
88use super::resolve::{AUTO_INSTALL_SOURCES, LUA_TYPE_AUTODETECT};
89use super::source::PackageSource;
90use super::AppService;
91use super::HubRegistriesError;
92
93// ─── Constants ─────────────────────────────────────────────────
94
95/// Cache TTL in seconds (1 hour).
96const CACHE_TTL_SECS: u64 = 3600;
97
98/// Guard against names that cannot be safely interpolated into a Lua `require()`
99/// call. Only ASCII alphanumerics, underscores, and hyphens are allowed.
100/// Mirrors the same check in `pkg/list.rs`, `pkg/repair.rs`, etc.
101fn is_safe_pkg_name(name: &str) -> bool {
102    !name.is_empty()
103        && name
104            .bytes()
105            .all(|b| b.is_ascii_alphanumeric() || b == b'_' || b == b'-')
106}
107
108/// HTTP request timeout (30 seconds).
109const HTTP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
110
111// ─── Index schema ──────────────────────────────────────────────
112
113/// Remote index — same shape as the local index so merge is trivial.
114#[derive(Debug, Clone, Serialize, Deserialize)]
115pub(crate) struct HubIndex {
116    pub schema_version: String,
117    #[serde(default)]
118    pub updated_at: String,
119    #[serde(default)]
120    pub packages: Vec<IndexEntry>,
121}
122
123/// One package in the index.
124///
125/// `entity` carries the canonical Lua `M.meta` projection (name, version,
126/// description, category, docstring) via `#[serde(flatten)]` so the wire
127/// shape is identical to the pre-refactor flat-object layout. `source`
128/// is the typed package source; `card_count` / `best_card` are hub-side
129/// enrichments computed at index-build time.
130#[derive(Debug, Clone, Serialize, Deserialize)]
131pub(crate) struct IndexEntry {
132    #[serde(flatten)]
133    pub entity: PkgEntity,
134    /// How this package was obtained. Typed on write; legacy bare strings
135    /// in pre-migration `hub_index.json` deserialize via the serde shim
136    /// on `PackageSource` (see `service::source`).
137    #[serde(default)]
138    pub source: PackageSource,
139    #[serde(default)]
140    pub card_count: usize,
141    #[serde(default)]
142    pub best_card: Option<BestCard>,
143}
144
145/// Best card summary within a package.
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub(crate) struct BestCard {
148    pub card_id: String,
149    #[serde(default)]
150    pub model: String,
151    #[serde(default)]
152    pub pass_rate: f64,
153    #[serde(default)]
154    pub scenario: String,
155}
156
157/// Search result — index entry enriched with local install state.
158///
159/// `entity.docstring` is `skip_serializing` (via the `skip_docstring`
160/// custom serializer on the flattened struct) so the default serde output
161/// never exposes the docstring field — docstrings can be large and
162/// dominate payload size. The `hub_search` projection path re-attaches
163/// the docstring to the output object when the resolved field set
164/// contains `"docstring"`, via
165/// [`SearchResult::to_value_with_optional_docstring`].
166///
167/// `docstring_matched` is a query-time signal: it is `Some(true)` only
168/// when the query hit docstring and none of {name, description, category}.
169/// Otherwise (no query, or query hit any of the other fields) it is
170/// `None` and omitted from the output.
171///
172/// Because `#[serde(flatten)]` composes poorly with field-level
173/// `skip_serializing`, we carry the non-docstring part of `PkgEntity`
174/// via a custom `serialize_entity_without_docstring` path rather than a
175/// bare `#[serde(flatten)]`. The struct still holds a full `PkgEntity`
176/// internally for consistency with `IndexEntry`.
177#[derive(Debug, Clone, Serialize)]
178struct SearchResult {
179    #[serde(flatten, serialize_with = "serialize_entity_without_docstring")]
180    entity: PkgEntity,
181    /// Typed source (mirrors `IndexEntry.source`).
182    source: PackageSource,
183    installed: bool,
184    card_count: usize,
185    best_card: Option<BestCard>,
186    #[serde(skip_serializing_if = "Option::is_none")]
187    docstring_matched: Option<bool>,
188}
189
190/// Serialize a `PkgEntity` as a flat JSON object, intentionally dropping
191/// the `docstring` field so large docstrings do not dominate `hub_search`
192/// payloads. The projection path re-attaches docstring via
193/// [`SearchResult::to_value_with_optional_docstring`].
194fn serialize_entity_without_docstring<S>(entity: &PkgEntity, ser: S) -> Result<S::Ok, S::Error>
195where
196    S: serde::Serializer,
197{
198    use serde::ser::SerializeMap;
199    let mut map = ser.serialize_map(Some(6))?;
200    map.serialize_entry("name", &entity.name)?;
201    map.serialize_entry("version", &entity.version)?;
202    map.serialize_entry("description", &entity.description)?;
203    map.serialize_entry("category", &entity.category)?;
204    map.serialize_entry("tags", &entity.tags)?;
205    map.serialize_entry("type", &entity.pkg_type)?;
206    map.end()
207}
208
209impl SearchResult {
210    /// Serialize `self` to a JSON `Value`, optionally re-attaching
211    /// `docstring` to the resulting object.
212    ///
213    /// `skip_serializing` removes `docstring` from every serde output
214    /// path. When projection selects `docstring` as an output field, we
215    /// need to put it back — this helper bridges that gap by inserting
216    /// the field manually into the resulting `Value::Object`.
217    ///
218    /// Returns the original `Value` unchanged if serialization produced
219    /// a non-object (should not happen for `SearchResult`, but we stay
220    /// defensive because the downstream `project_fields` contract
221    /// tolerates non-objects).
222    fn to_value_with_optional_docstring(&self, include_docstring: bool) -> serde_json::Value {
223        let mut v = serde_json::to_value(self).unwrap_or(serde_json::Value::Null);
224        if include_docstring {
225            if let serde_json::Value::Object(ref mut map) = v {
226                let doc = self.entity.docstring.clone().unwrap_or_default();
227                map.insert("docstring".to_string(), serde_json::Value::String(doc));
228            }
229        }
230        v
231    }
232}
233
234// ─── Hub registries ───────────────────────────────────────────
235//
236// Persistent file (`~/.algocline/hub_registries.json`) that records
237// source URLs from `pkg_install` and `card_install`.  This is the
238// primary source for Hub index URL discovery — the manifest and the
239// bundled-packages seed serve as fallback sources.
240
241/// One entry in `hub_registries.json`.
242#[derive(Debug, Clone, Serialize, Deserialize)]
243pub(crate) struct RegistryEntry {
244    /// Original source URL (Git repo or local path).
245    pub source: String,
246    /// How it was registered: "pkg_install" or "card_install".
247    pub origin: String,
248    /// ISO 8601 timestamp of when the entry was added.
249    pub added_at: String,
250}
251
252/// Top-level registries file.
253#[derive(Debug, Clone, Serialize, Deserialize, Default)]
254pub(crate) struct HubRegistries {
255    pub registries: Vec<RegistryEntry>,
256}
257
258fn registries_path(app_dir: &AppDir) -> PathBuf {
259    app_dir.hub_registries_json()
260}
261
262/// Load registries from disk.
263///
264/// Returns `Ok(HubRegistries::default())` when the file does not yet exist —
265/// the file is created lazily on first `register_source` call. Returns `Err`
266/// when the file exists but cannot be read (I/O error) or parsed (corrupt
267/// JSON), so callers can surface the failure instead of silently degrading hub
268/// discovery.
269fn load_registries(app_dir: &AppDir) -> Result<HubRegistries, HubRegistriesError> {
270    let path = registries_path(app_dir);
271    if !path.exists() {
272        return Ok(HubRegistries::default());
273    }
274    let content = std::fs::read_to_string(&path).map_err(|e| {
275        HubRegistriesError::Parse(format!(
276            "failed to read hub_registries.json at {}: {e}",
277            path.display()
278        ))
279    })?;
280    serde_json::from_str::<HubRegistries>(&content).map_err(|e| {
281        HubRegistriesError::Parse(format!(
282            "failed to parse hub_registries.json at {}: {e}",
283            path.display()
284        ))
285    })
286}
287
288/// Register a source URL.  Deduplicates by normalized URL.
289///
290/// Returns `Ok(())` on success or when the input is skipped (empty /
291/// local path / already registered). Filesystem failures are returned
292/// as `Err(String)` so callers can surface them on the MCP wire
293/// response — the registry is best-effort relative to the `pkg_install`
294/// itself, but the caller still needs to know when it silently failed
295/// (otherwise hub discovery degrades without any signal).
296///
297/// Uses atomic write (tempfile + rename) to avoid partial writes if
298/// the process is interrupted. Read-modify-write is not locked across
299/// processes, but MCP servers are single-process so this is safe in
300/// practice.
301pub(crate) fn register_source(app_dir: &AppDir, source: &str, origin: &str) -> Result<(), String> {
302    let normalized = source.trim_end_matches('/').to_string();
303    if normalized.is_empty() {
304        return Ok(());
305    }
306    // Skip local paths — they can't host a remote index
307    if normalized.starts_with('/') || normalized.starts_with('.') {
308        return Ok(());
309    }
310
311    let path = registries_path(app_dir);
312    if let Some(parent) = path.parent() {
313        std::fs::create_dir_all(parent).map_err(|e| {
314            format!(
315                "failed to create hub registries dir {}: {e}",
316                parent.display()
317            )
318        })?;
319    }
320
321    // Re-read from disk right before write to minimize TOCTOU window.
322    // Parse failure is propagated — a corrupt registries file means we
323    // cannot safely read-modify-write without risking data loss.
324    let mut reg = load_registries(app_dir).map_err(|e| format!("cannot register source: {e}"))?;
325
326    // Already registered?
327    if reg
328        .registries
329        .iter()
330        .any(|e| e.source.trim_end_matches('/') == normalized)
331    {
332        return Ok(());
333    }
334
335    reg.registries.push(RegistryEntry {
336        source: normalized,
337        origin: origin.to_string(),
338        added_at: manifest::now_iso8601(),
339    });
340
341    // Atomic write: write to temp file, then rename
342    let json = serde_json::to_string_pretty(&reg)
343        .map_err(|e| format!("failed to serialize hub registries: {e}"))?;
344    let tmp_path = path.with_extension("json.tmp");
345    std::fs::write(&tmp_path, &json).map_err(|e| {
346        format!(
347            "failed to write hub registries tmp {}: {e}",
348            tmp_path.display()
349        )
350    })?;
351    std::fs::rename(&tmp_path, &path).map_err(|e| {
352        // Best-effort cleanup of the stale tmp file on rename failure.
353        let _ = std::fs::remove_file(&tmp_path);
354        format!(
355            "failed to atomically rename hub registries onto {}: {e}",
356            path.display()
357        )
358    })
359}
360
361// ─── Hub config ──────────────────────────────────────────────
362//
363// Optional `[hub]` section in `~/.algocline/config.toml`:
364//
365//   [hub]
366//   collection_url = "https://raw.githubusercontent.com/.../hub_index.json"
367//
368// When set, this is fetched as Tier 0 (the aggregated collection
369// index containing all known packages, including uninstalled ones).
370
371/// Read the `[hub].collection_url` from `~/.algocline/config.toml`.
372///
373/// Returns:
374/// - `Ok(Some(url))` — file exists, parses cleanly, `[hub].collection_url` present and non-empty.
375/// - `Ok(None)` — file absent (normal: config is optional) or `[hub].collection_url` not set.
376/// - `Err(msg)` — file exists but TOML parse fails (corruption); caller should surface as warning.
377fn collection_url_from_config(app_dir: &AppDir) -> Result<Option<String>, String> {
378    let path = app_dir.config_toml();
379    let content = match std::fs::read_to_string(&path) {
380        Ok(c) => c,
381        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
382        Err(_) => return Ok(None), // permission errors etc. treated as absent
383    };
384    let doc: toml_edit::DocumentMut = content
385        .parse()
386        .map_err(|e| format!("config.toml parse: {e}"))?;
387    let url = match doc
388        .get("hub")
389        .and_then(|h| h.get("collection_url"))
390        .and_then(|v| v.as_str())
391    {
392        Some(s) => s.trim().to_string(),
393        None => return Ok(None),
394    };
395    if url.is_empty() {
396        Ok(None)
397    } else {
398        Ok(Some(url))
399    }
400}
401
402// ─── Index URL discovery ──────────────────────────────────────
403//
404// Derives remote index URLs from:
405//   0. Hub Collection URL (from config.toml) — aggregated index
406//   1. Hub registries (`hub_registries.json`) — primary source
407//   2. Unique `source` fields in the installed-packages manifest
408//   3. Bundled-packages seed (for first-run bootstrap)
409//
410// GitHub repos are transformed:
411//   https://github.com/{owner}/{repo}  →
412//   https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
413
414/// Convert a GitHub repo URL to a raw `hub_index.json` URL.
415/// Returns `None` for non-GitHub URLs (future: support other hosts).
416fn repo_to_index_url(repo_url: &str) -> Option<String> {
417    let trimmed = repo_url.trim_end_matches('/').trim_end_matches(".git");
418    if let Some(path) = trimmed.strip_prefix("https://github.com/") {
419        // path = "owner/repo"
420        let parts: Vec<&str> = path.splitn(3, '/').collect();
421        if parts.len() >= 2 {
422            return Some(format!(
423                "https://raw.githubusercontent.com/{}/{}/main/hub_index.json",
424                parts[0], parts[1]
425            ));
426        }
427    }
428    // Non-GitHub URL: assume it's already a direct index URL
429    if trimmed.ends_with(".json") {
430        Some(trimmed.to_string())
431    } else {
432        None
433    }
434}
435
436/// Collect unique index URLs from config + registries + manifest + bundled seeds.
437///
438/// Returns `Err` if the installed manifest cannot be read (corrupt JSON /
439/// permission denied). The function intentionally surfaces manifest-read
440/// failures rather than silently skipping — callers feed these URLs into
441/// hub resolution, and a partial URL set is indistinguishable from a
442/// corrupt manifest without the signal.
443///
444/// `warnings` collects non-fatal issues (e.g. config.toml TOML parse failure)
445/// that the caller should surface on the MCP wire response.
446fn discover_index_urls(
447    app_dir: &AppDir,
448    warnings: &mut Vec<String>,
449) -> Result<Vec<String>, String> {
450    let mut index_urls: Vec<String> = Vec::new();
451
452    // 0. From config.toml [hub].collection_url (Tier 0 — aggregated collection).
453    // Parse failures (corrupted config) are collected as warnings so the
454    // rest of discovery proceeds — the file is optional, but corruption
455    // is distinguishable from absence and must be surfaced to the caller.
456    match collection_url_from_config(app_dir) {
457        Ok(Some(url)) => index_urls.push(url),
458        Ok(None) => {}
459        Err(e) => warnings.push(format!("config.toml hub.collection_url: {e}")),
460    }
461
462    let mut repo_urls: HashSet<String> = HashSet::new();
463
464    // 1. From hub registries (primary). Parse failure is propagated so
465    // callers know the registry is degraded — a partial URL set from a
466    // corrupt file is indistinguishable from intentionally empty.
467    // `HubRegistriesError` is converted to `String` at the wire boundary
468    // (`discover_index_urls` still returns `Result<_, String>`).
469    let reg = load_registries(app_dir).map_err(|e| e.to_string())?;
470    for entry in &reg.registries {
471        let normalized = entry.source.trim_end_matches('/').to_string();
472        if !normalized.is_empty() {
473            repo_urls.insert(normalized);
474        }
475    }
476
477    // 2. From manifest (catch sources registered before hub_registries existed).
478    // Only Git-variant sources can host a remote hub_index.json; other variants
479    // (Path / Installed / Bundled / Unknown) are skipped by `git_url()` returning None.
480    let m = manifest::load_manifest(app_dir)?;
481    for entry in m.packages.values() {
482        if let Some(url) = entry.source.git_url() {
483            let normalized = url.trim_end_matches('/').to_string();
484            if !normalized.is_empty() {
485                repo_urls.insert(normalized);
486            }
487        }
488    }
489
490    // 3. Fallback: bundled sources (ensures at least these are checked)
491    for url in AUTO_INSTALL_SOURCES {
492        repo_urls.insert(url.to_string());
493    }
494
495    // 4. Transform repo URLs → index URLs, dedup against Tier 0
496    let existing: HashSet<String> = index_urls.iter().cloned().collect();
497    let mut derived: Vec<String> = repo_urls
498        .iter()
499        .filter_map(|url| repo_to_index_url(url))
500        .filter(|url| !existing.contains(url))
501        .collect();
502    derived.sort();
503    derived.dedup();
504    index_urls.extend(derived);
505
506    Ok(index_urls)
507}
508
509// ─── Per-source cache ─────────────────────────────────────────
510//
511// Each remote index is cached separately at
512// `~/.algocline/hub_cache/{hash}.json` where hash is derived from
513// the index URL. This avoids mixing data from different registries
514// and allows per-source TTL validation.
515
516fn cache_dir(app_dir: &AppDir) -> PathBuf {
517    app_dir.hub_cache_dir()
518}
519
520fn cache_key(url: &str) -> String {
521    // Simple hash: use the URL bytes to produce a stable hex string.
522    // Avoids pulling in a hash crate — good enough for cache file naming.
523    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV-1a offset basis
524    for b in url.as_bytes() {
525        h ^= *b as u64;
526        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
527    }
528    format!("{h:016x}")
529}
530
531/// Result of a cache lookup distinguishing absent, stale, fresh, and corrupt.
532///
533/// Used by `load_cached_full` (called from `aggregate_index`) to allow
534/// stale data to be merged into the aggregate while a warning is emitted.
535/// `load_cached` (used by `fetch_one`) maps both `NotPresent` and `Stale`
536/// to `Ok(None)` for backward compat.
537enum CacheLookup {
538    /// File absent.
539    NotPresent,
540    /// File present but older than `CACHE_TTL_SECS`; contains the stale data.
541    Stale(HubIndex),
542    /// File present, within TTL, parsed cleanly.
543    Fresh(HubIndex),
544    /// File present (within TTL) but JSON parse failed.
545    Corrupt(String),
546}
547
548/// Full cache lookup that distinguishes stale from absent.
549///
550/// Used by `aggregate_index` so stale data can still be merged with a
551/// warning, rather than being silently discarded.
552fn load_cached_full(app_dir: &AppDir, url: &str) -> CacheLookup {
553    let dir = cache_dir(app_dir);
554    let path = dir.join(format!("{}.json", cache_key(url)));
555    if !path.exists() {
556        return CacheLookup::NotPresent;
557    }
558    let metadata = match std::fs::metadata(&path) {
559        Ok(m) => m,
560        Err(_) => return CacheLookup::NotPresent,
561    };
562    let age = match metadata.modified().ok().and_then(|t| t.elapsed().ok()) {
563        Some(a) => a,
564        None => return CacheLookup::NotPresent,
565    };
566    let content = match std::fs::read_to_string(&path) {
567        Ok(c) => c,
568        Err(e) => return CacheLookup::Corrupt(format!("hub cache read {}: {e}", path.display())),
569    };
570    match serde_json::from_str::<HubIndex>(&content) {
571        Ok(index) => {
572            if age.as_secs() > CACHE_TTL_SECS {
573                CacheLookup::Stale(index)
574            } else {
575                CacheLookup::Fresh(index)
576            }
577        }
578        Err(e) => CacheLookup::Corrupt(format!("hub cache parse {}: {e}", path.display())),
579    }
580}
581
582/// Load cached remote index for a specific URL if fresh (within TTL).
583///
584/// Returns:
585/// - `Ok(Some(index))` — cache hit: file exists, within TTL, parses cleanly.
586/// - `Ok(None)` — cache miss: file absent, expired, or metadata unreadable (treat as miss).
587/// - `Err(msg)` — file exists and is within TTL but JSON parse fails (corruption);
588///   caller should surface as warning and fall back to a network fetch.
589fn load_cached(app_dir: &AppDir, url: &str) -> Result<Option<HubIndex>, String> {
590    match load_cached_full(app_dir, url) {
591        CacheLookup::Fresh(index) => Ok(Some(index)),
592        CacheLookup::NotPresent | CacheLookup::Stale(_) => Ok(None),
593        CacheLookup::Corrupt(msg) => Err(msg),
594    }
595}
596
597/// Save remote index to per-source cache file.
598///
599/// Returns `Ok(())` on success. Cache write failures are returned as
600/// `Err(String)`; the caller (`fetch_one`) carries them out of band so
601/// hub fetch still completes (the index is in memory) but the warning
602/// surfaces to the MCP wire response via the existing `warnings` channel.
603fn save_cached(app_dir: &AppDir, url: &str, index: &HubIndex) -> Result<(), String> {
604    let dir = cache_dir(app_dir);
605    std::fs::create_dir_all(&dir)
606        .map_err(|e| format!("failed to create hub cache dir {}: {e}", dir.display()))?;
607    let path = dir.join(format!("{}.json", cache_key(url)));
608    let json = serde_json::to_string_pretty(index)
609        .map_err(|e| format!("failed to serialize hub cache: {e}"))?;
610    std::fs::write(&path, json)
611        .map_err(|e| format!("failed to write hub cache {}: {e}", path.display()))
612}
613
614// ─── Remote fetch ──────────────────────────────────────────────
615
616/// Fetch a single remote index by URL, using per-source cache.
617///
618/// Returns the index plus an optional cache-related warning. The warning
619/// is non-None when either:
620/// - The network fetch succeeded but persisting the cache to disk failed.
621/// - The cache file was present and within TTL but failed to parse
622///   (corruption); in that case the function falls back to a network
623///   fetch and includes the parse-failure in the warning so the operator
624///   can investigate the on-disk state.
625fn fetch_one(app_dir: &AppDir, url: &str) -> Result<(HubIndex, Option<String>), String> {
626    // Distinguish cache corruption (Err) from cache miss (Ok(None)).
627    match load_cached(app_dir, url) {
628        Ok(Some(cached)) => return Ok((cached, None)),
629        Ok(None) => {} // cache miss — proceed to network fetch
630        Err(e) => {
631            // Cache file is corrupt. Fall through to network fetch and
632            // carry the corruption warning so the caller can surface it.
633            // We don't return Err here because the network path may still succeed.
634            let warn = format!("hub cache corrupted for {url}: {e}; falling back to network");
635            // Attempt network fetch; on success, attach the cache-corruption warning.
636            return fetch_one_from_network(app_dir, url)
637                .map(|(idx, save_warn)| {
638                    // Prefer the corruption warning; save_warn is secondary.
639                    let combined = Some(match save_warn {
640                        Some(sw) => format!("{warn}; {sw}"),
641                        None => warn.clone(),
642                    });
643                    (idx, combined)
644                })
645                .map_err(|fetch_err| format!("{warn}; network fetch also failed: {fetch_err}"));
646        }
647    }
648
649    fetch_one_from_network(app_dir, url)
650}
651
652/// Network-only path for fetching a remote index (no cache read).
653///
654/// On success returns `(index, Option<cache_write_warning>)`.
655fn fetch_one_from_network(
656    app_dir: &AppDir,
657    url: &str,
658) -> Result<(HubIndex, Option<String>), String> {
659    let agent = ureq::Agent::new_with_config(
660        ureq::config::Config::builder()
661            .timeout_global(Some(HTTP_TIMEOUT))
662            .build(),
663    );
664    let body: String = agent
665        .get(url)
666        .call()
667        .map_err(|e| format!("Failed to fetch {url}: {e}"))?
668        .body_mut()
669        .read_to_string()
670        .map_err(|e| format!("Failed to read response from {url}: {e}"))?;
671
672    let index: HubIndex = serde_json::from_str(&body)
673        .map_err(|e| format!("Failed to parse index from {url}: {e}"))?;
674
675    let cache_warning = save_cached(app_dir, url, &index)
676        .err()
677        .map(|e| format!("hub cache write for {url}: {e}"));
678    Ok((index, cache_warning))
679}
680
681/// Fetch all discovered remote indices and merge into one.
682/// Falls back gracefully: failed sources are skipped with warnings.
683fn fetch_remote_indices(app_dir: &AppDir) -> Result<(HubIndex, Vec<String>), String> {
684    let mut warnings: Vec<String> = Vec::new();
685    let urls = discover_index_urls(app_dir, &mut warnings)?;
686    let mut all_packages: Vec<IndexEntry> = Vec::new();
687    let mut seen_names: HashSet<String> = HashSet::new();
688
689    for url in &urls {
690        match fetch_one(app_dir, url) {
691            Ok((index, cache_warning)) => {
692                for entry in index.packages {
693                    if seen_names.insert(entry.entity.name.clone()) {
694                        all_packages.push(entry);
695                    }
696                    // If duplicate name across sources, first wins
697                }
698                if let Some(w) = cache_warning {
699                    warnings.push(w);
700                }
701            }
702            Err(e) => {
703                warnings.push(e);
704            }
705        }
706    }
707
708    if all_packages.is_empty() && !warnings.is_empty() {
709        warnings.insert(
710            0,
711            "all remote indices unavailable, showing local packages only".to_string(),
712        );
713    }
714
715    let merged = HubIndex {
716        schema_version: "hub_index/v0".into(),
717        updated_at: String::new(),
718        packages: all_packages,
719    };
720    Ok((merged, warnings))
721}
722
723// ─── Local state ───────────────────────────────────────────────
724
725/// Build a set of locally installed package names from `installed.json`
726/// and the `~/.algocline/packages/` directory.
727fn installed_packages(app_dir: &AppDir) -> Result<HashMap<String, Option<String>>, String> {
728    let mut map = HashMap::new();
729
730    // From manifest (has version info)
731    let m = manifest::load_manifest(app_dir)?;
732    for (name, entry) in &m.packages {
733        map.insert(name.clone(), entry.version.clone());
734    }
735
736    // Also scan packages/ dir in case manifest is stale
737    let pkg_dir = app_dir.packages_dir();
738    if let Ok(entries) = std::fs::read_dir(&pkg_dir) {
739        for entry in entries.flatten() {
740            if entry.path().is_dir() {
741                if let Some(name) = entry.file_name().to_str() {
742                    map.entry(name.to_string()).or_insert(None);
743                }
744            }
745        }
746    }
747
748    Ok(map)
749}
750
751/// Count local cards per package from `{app_dir}/cards/{pkg}/`.
752fn local_card_counts(app_dir: &AppDir) -> HashMap<String, usize> {
753    let mut map = HashMap::new();
754    let cards_dir = app_dir.cards_dir();
755    let entries = match std::fs::read_dir(&cards_dir) {
756        Ok(e) => e,
757        Err(_) => return map,
758    };
759    for entry in entries.flatten() {
760        if !entry.path().is_dir() {
761            continue;
762        }
763        let pkg = match entry.file_name().to_str() {
764            Some(n) => n.to_string(),
765            None => continue,
766        };
767        let count = std::fs::read_dir(entry.path())
768            .map(|es| {
769                es.flatten()
770                    .filter(|e| e.path().extension().is_some_and(|ext| ext == "toml"))
771                    .count()
772            })
773            .unwrap_or(0);
774        if count > 0 {
775            map.insert(pkg, count);
776        }
777    }
778    map
779}
780
781/// Count eval results for a specific package by scanning `{app_dir}/evals/`.
782///
783/// Reads only `.meta.json` files (lightweight) to check the strategy field.
784/// Falls back to reading full eval JSON if meta is missing.
785///
786/// `warnings` receives per-file corruption messages (read or parse failures).
787/// I/O errors on the directory itself return 0 silently (evals dir absent is
788/// a legitimate "no evals yet" state). Per-file errors that indicate corruption
789/// (file exists but is unreadable or unparseable) are pushed to `warnings` so
790/// the caller can surface them on the MCP wire response.
791fn count_evals_for_pkg(app_dir: &AppDir, pkg: &str, warnings: &mut Vec<String>) -> usize {
792    let evals_dir = app_dir.evals_dir();
793    let entries = match std::fs::read_dir(&evals_dir) {
794        Ok(e) => e,
795        Err(_) => return 0,
796    };
797
798    // Collect all filenames first so ordering doesn't matter.
799    // We track stems that have a .meta.json to avoid reading the full eval JSON.
800    let mut meta_stems: HashSet<String> = HashSet::new();
801    let mut meta_matches: usize = 0;
802    let mut non_meta_paths: Vec<(PathBuf, String)> = Vec::new(); // (path, stem)
803
804    for entry in entries.flatten() {
805        let path = entry.path();
806        let name = match path.file_name().and_then(|n| n.to_str()) {
807            Some(n) => n.to_string(),
808            None => continue,
809        };
810
811        if name.ends_with(".meta.json") {
812            let stem = name.trim_end_matches(".meta.json").to_string();
813            meta_stems.insert(stem.clone());
814            // Distinguish I/O failure from parse failure so corruption is visible.
815            match std::fs::read_to_string(&path) {
816                Ok(content) => match serde_json::from_str::<serde_json::Value>(&content) {
817                    Ok(val) => {
818                        if val.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
819                            meta_matches += 1;
820                        }
821                    }
822                    Err(e) => warnings.push(format!("eval meta parse {}: {e}", path.display())),
823                },
824                Err(e) => warnings.push(format!("eval meta read {}: {e}", path.display())),
825            }
826            continue;
827        }
828
829        // Skip non-json or comparison files
830        if !name.ends_with(".json") || name.starts_with("compare_") {
831            continue;
832        }
833
834        let stem = path
835            .file_stem()
836            .and_then(|s| s.to_str())
837            .unwrap_or("")
838            .to_string();
839        non_meta_paths.push((path, stem));
840    }
841
842    // Only read full eval JSON for entries without a .meta.json.
843    // Distinguish I/O and parse failures; both are surfaced as warnings.
844    let mut fallback_matches: usize = 0;
845    for (path, stem) in &non_meta_paths {
846        if meta_stems.contains(stem) {
847            continue;
848        }
849        match std::fs::read_to_string(path) {
850            Ok(c) => match serde_json::from_str::<serde_json::Value>(&c) {
851                Ok(v) => {
852                    if v.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
853                        fallback_matches += 1;
854                    }
855                }
856                Err(e) => warnings.push(format!("eval result parse {}: {e}", path.display())),
857            },
858            Err(e) => warnings.push(format!("eval result read {}: {e}", path.display())),
859        }
860    }
861
862    meta_matches + fallback_matches
863}
864
865// ─── Merge ─────────────────────────────────────────────────────
866
867/// Merge remote index with local install state.
868///
869/// When a package is installed locally and the remote index lacks a
870/// docstring (pre-v0.21 indices), the docstring is extracted from the
871/// local `init.lua` so that full-text search works immediately.
872fn merge(app_dir: &AppDir, remote: &HubIndex) -> Result<Vec<SearchResult>, String> {
873    let installed = installed_packages(app_dir)?;
874    let card_counts = local_card_counts(app_dir);
875    let pkg_dir: Option<PathBuf> = Some(app_dir.packages_dir());
876
877    let mut seen: HashSet<String> = HashSet::new();
878    let mut results: Vec<SearchResult> = Vec::new();
879
880    for entry in &remote.packages {
881        let pkg_name = &entry.entity.name;
882        let is_installed = installed.contains_key(pkg_name);
883        let local_cards = card_counts.get(pkg_name).copied().unwrap_or(0);
884
885        // Supplement empty docstring from local init.lua when installed.
886        // Re-parse via `PkgEntity` so the supplementation path stays
887        // consistent with `build_index`.
888        let docstring = if entry.entity.docstring.as_deref().unwrap_or("").is_empty()
889            && is_installed
890        {
891            pkg_dir
892                .as_ref()
893                .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(pkg_name).join("init.lua")))
894                .and_then(|e| e.docstring)
895        } else {
896            entry.entity.docstring.clone()
897        };
898
899        seen.insert(pkg_name.clone());
900        let mut merged_entity = entry.entity.clone();
901        merged_entity.docstring = docstring;
902        merged_entity.pkg_type = merged_entity.pkg_type.or(Some(PkgType::Runnable));
903        results.push(SearchResult {
904            entity: merged_entity,
905            source: entry.source.clone(),
906            installed: is_installed,
907            card_count: if is_installed && local_cards > entry.card_count {
908                local_cards
909            } else {
910                entry.card_count
911            },
912            best_card: entry.best_card.clone(),
913            docstring_matched: None,
914        });
915    }
916
917    // Add local-only packages (not in remote index).
918    for (name, version) in &installed {
919        if seen.contains(name) {
920            continue;
921        }
922        // Pull full `PkgEntity` from local init.lua when available (keeps the
923        // wire shape consistent with remote entries). When the package does
924        // not parse as a `PkgEntity` (missing `M.meta.name`), fall back to
925        // a minimal entity with just the directory name and the manifest
926        // version — the entry still appears in local-only listings, but the
927        // richer projection fields are simply absent.
928        let parsed_entity = pkg_dir
929            .as_ref()
930            .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(name).join("init.lua")));
931        let entity = parsed_entity.unwrap_or(PkgEntity {
932            name: name.clone(),
933            version: version.clone(),
934            description: None,
935            category: None,
936            docstring: None,
937            tags: None,
938            pkg_type: Some(PkgType::Runnable),
939            type_source: None,
940        });
941        results.push(SearchResult {
942            entity,
943            source: PackageSource::Unknown,
944            installed: true,
945            card_count: card_counts.get(name).copied().unwrap_or(0),
946            best_card: None,
947            docstring_matched: None,
948        });
949    }
950
951    Ok(results)
952}
953
954// ─── Search (filtering) ───────────────────────────────────────
955
956fn matches_query(result: &SearchResult, query: &str) -> bool {
957    let q = query.to_lowercase();
958    let pkg = &result.entity;
959    let empty = String::new();
960    pkg.name.to_lowercase().contains(&q)
961        || pkg
962            .description
963            .as_ref()
964            .unwrap_or(&empty)
965            .to_lowercase()
966            .contains(&q)
967        || pkg
968            .category
969            .as_ref()
970            .unwrap_or(&empty)
971            .to_lowercase()
972            .contains(&q)
973        || pkg
974            .docstring
975            .as_ref()
976            .unwrap_or(&empty)
977            .to_lowercase()
978            .contains(&q)
979        || pkg
980            .tags
981            .as_ref()
982            .is_some_and(|tags| tags.iter().any(|tag| tag.to_lowercase().contains(&q)))
983}
984
985// ─── Index generation (reindex) ───────────────────────────────
986//
987// The non-Lua-VM parser that used to live here
988// (`parse_meta_from_init_lua` / `extract_docstring`) has moved into
989// `algocline_core::PkgEntity::parse_from_init_lua`, where it is shared
990// with the manifest / lockfile wire format. The parsing tests migrated
991// with it; `hub.rs` now just consumes the typed `PkgEntity` projection.
992
993/// Build a hub index by scanning a packages directory.
994///
995/// When `source_dir` is provided, scans that directory directly
996/// (for generating an index from a repo checkout).  Metadata comes
997/// only from `init.lua` — no manifest lookup, no card counts.
998///
999/// When `source_dir` is `None`, scans `~/.algocline/packages/` and
1000/// enriches entries with manifest source and local card counts.
1001async fn build_index(
1002    app_dir: &AppDir,
1003    source_dir: Option<&std::path::Path>,
1004    executor: &std::sync::Arc<algocline_engine::Executor>,
1005) -> Result<HubIndex, String> {
1006    let empty = || HubIndex {
1007        schema_version: "hub_index/v0".into(),
1008        updated_at: super::manifest::now_iso8601(),
1009        packages: Vec::new(),
1010    };
1011
1012    let pkg_dir = match source_dir {
1013        Some(d) => d.to_path_buf(),
1014        None => app_dir.packages_dir(),
1015    };
1016
1017    let use_local_state = source_dir.is_none();
1018    let card_counts = if use_local_state {
1019        local_card_counts(app_dir)
1020    } else {
1021        HashMap::new()
1022    };
1023    // Manifest read errors surface as `Err` rather than degrading to an
1024    // empty manifest — when building the local hub index, a corrupt
1025    // `installed.json` silently turning all package sources into
1026    // `PackageSource::Unknown` would be indistinguishable from the
1027    // legitimate "no source recorded" state, and would ship into
1028    // generated `hub_index.json` files verbatim.
1029    let manifest = if use_local_state {
1030        manifest::load_manifest(app_dir)?
1031    } else {
1032        manifest::Manifest::default()
1033    };
1034
1035    let mut entries = Vec::new();
1036
1037    // Missing / unreadable `pkg_dir` is a legitimate "no packages yet"
1038    // state on a fresh install — distinct from manifest corruption
1039    // above, and safe to surface as an empty index.
1040    let dir_entries = match std::fs::read_dir(&pkg_dir) {
1041        Ok(e) => e,
1042        Err(_) => return Ok(empty()),
1043    };
1044
1045    for entry in dir_entries.flatten() {
1046        if !entry.path().is_dir() {
1047            continue;
1048        }
1049        let dir_name = match entry.file_name().to_str() {
1050            Some(n) if !n.starts_with('.') && !n.starts_with('_') => n.to_string(),
1051            _ => continue,
1052        };
1053
1054        let init_lua = entry.path().join("init.lua");
1055        if !init_lua.exists() {
1056            continue;
1057        }
1058
1059        // Silent-exclude gate: `PkgEntity::parse_from_init_lua` returns `None`
1060        // when `M.meta` is absent or `M.meta.name` is empty. Directories that
1061        // happen to contain an `init.lua` but aren't algocline packages
1062        // (e.g. `alc_shapes/`, a type DSL library) are dropped from the index
1063        // rather than falling through with a placeholder name — that would
1064        // pollute hub_search.
1065        let Some(mut entity) = PkgEntity::parse_from_init_lua(&init_lua) else {
1066            continue;
1067        };
1068
1069        // Resolve pkg_type via VM eval (LUA_TYPE_AUTODETECT) — single source of
1070        // truth for type detection. Unsafe names cannot be interpolated into
1071        // require() so they degrade to pkg_type: None. eval failures also degrade
1072        // to None (best-effort: hub index is a display mirror, not the gate that
1073        // rejects library pkgs at run/eval time).
1074        entity.pkg_type = if is_safe_pkg_name(&dir_name) {
1075            let code = format!(
1076                r#"package.loaded["{name}"] = nil
1077local pkg = require("{name}")
1078local meta = pkg.meta or {{ name = "{name}" }}
1079{LUA_TYPE_AUTODETECT}
1080return meta"#,
1081                name = dir_name,
1082                LUA_TYPE_AUTODETECT = LUA_TYPE_AUTODETECT,
1083            );
1084            let eval_result = if source_dir.is_some() {
1085                // source_dir mode: pkg is not in ~/.algocline, pass the pkg
1086                // directory as an extra lib path so require() resolves.
1087                executor
1088                    .eval_simple_with_paths(code, vec![pkg_dir.clone()], vec![])
1089                    .await
1090            } else {
1091                executor.eval_simple(code).await
1092            };
1093            match eval_result {
1094                Ok(meta) => meta
1095                    .get("type")
1096                    .and_then(|v| v.as_str())
1097                    .and_then(|s| s.parse::<algocline_core::PkgType>().ok()),
1098                Err(e) => {
1099                    tracing::warn!("hub: build_index VM eval failed for {dir_name}: {e}");
1100                    None
1101                }
1102            }
1103        } else {
1104            None
1105        };
1106
1107        // Use manifest source only for local-state mode. When the manifest
1108        // has no record for this directory, default to `PackageSource::Unknown`
1109        // (via `Default`) — hub consumers see it as "source not recorded".
1110        let source = manifest
1111            .packages
1112            .get(&dir_name)
1113            .map(|e| e.source.clone())
1114            .unwrap_or_default();
1115
1116        entries.push(IndexEntry {
1117            entity,
1118            source,
1119            card_count: card_counts.get(&dir_name).copied().unwrap_or(0),
1120            best_card: None,
1121        });
1122    }
1123
1124    entries.sort_by(|a, b| a.entity.name.cmp(&b.entity.name));
1125
1126    Ok(HubIndex {
1127        schema_version: "hub_index/v0".into(),
1128        updated_at: super::manifest::now_iso8601(),
1129        packages: entries,
1130    })
1131}
1132
1133// ─── Public API ────────────────────────────────────────────────
1134
1135impl AppService {
1136    /// Generate a hub index from a packages directory.
1137    ///
1138    /// When `source_dir` is provided, scans that directory (e.g. a
1139    /// repo checkout) — pure metadata extraction, no manifest or card
1140    /// data mixed in.  When omitted, scans `~/.algocline/packages/`.
1141    ///
1142    /// Writes the index to `output_path` (for CI / publishing).
1143    /// Does NOT touch the remote search cache.
1144    pub async fn hub_reindex(
1145        &self,
1146        output_path: Option<&str>,
1147        source_dir: Option<&str>,
1148    ) -> Result<String, String> {
1149        let src = source_dir.map(std::path::Path::new);
1150        if let Some(d) = src {
1151            if !d.is_dir() {
1152                return Err(format!("source_dir '{}' is not a directory", d.display()));
1153            }
1154        }
1155        let app_dir = self.log_config.app_dir();
1156        let index = build_index(&app_dir, src, &self.executor).await?;
1157
1158        let written_path = if let Some(path) = output_path {
1159            let json = serde_json::to_string_pretty(&index)
1160                .map_err(|e| format!("Failed to serialize index: {e}"))?;
1161            std::fs::write(path, &json)
1162                .map_err(|e| format!("Failed to write index to {path}: {e}"))?;
1163            Some(path.to_string())
1164        } else {
1165            None
1166        };
1167
1168        let response = serde_json::json!({
1169            "package_count": index.packages.len(),
1170            "updated_at": index.updated_at,
1171            "output_path": written_path,
1172            "source_dir": source_dir,
1173        });
1174        Ok(response.to_string())
1175    }
1176
1177    /// Show detailed information for a single package.
1178    ///
1179    /// Aggregates package metadata (from index or local `init.lua`),
1180    /// all Cards, aliases, and eval stats into one response.
1181    pub fn hub_info(&self, pkg: &str) -> Result<String, String> {
1182        use algocline_engine::card;
1183
1184        // Guard against path traversal
1185        if pkg.contains("..") || pkg.contains('/') || pkg.contains('\\') {
1186            return Err(format!("Invalid package name: '{pkg}'"));
1187        }
1188
1189        // Package metadata: try remote index first, fall back to local
1190        let app_dir = self.log_config.app_dir();
1191        let installed = installed_packages(&app_dir)?;
1192        let is_installed = installed.contains_key(pkg);
1193
1194        // Resolve package metadata: try remote index first, fall back to
1195        // local init.lua. `version` / `description` / `category` are modelled
1196        // as `Option<String>` at the `PkgEntity` layer; at this API surface
1197        // we flatten `None` to empty string so the wire shape (non-null
1198        // JSON string fields) stays unchanged for existing consumers.
1199        let (version, description, category, source) = {
1200            let (remote, _) = fetch_remote_indices(&app_dir)?;
1201            if let Some(entry) = remote.packages.iter().find(|e| e.entity.name == pkg) {
1202                (
1203                    entry.entity.version.clone().unwrap_or_default(),
1204                    entry.entity.description.clone().unwrap_or_default(),
1205                    entry.entity.category.clone().unwrap_or_default(),
1206                    entry.source.clone(),
1207                )
1208            } else if is_installed {
1209                // Fall back to local init.lua parse via `PkgEntity`. When
1210                // the file is not a valid package (no `M.meta.name`), we
1211                // degrade gracefully by returning the manifest-recorded
1212                // version and empty string fields — mirroring the pre-typed
1213                // behaviour.
1214                let init_lua = app_dir.packages_dir().join(pkg).join("init.lua");
1215                let entity = PkgEntity::parse_from_init_lua(&init_lua);
1216                let manifest_source = manifest::load_manifest(&app_dir)?
1217                    .packages
1218                    .get(pkg)
1219                    .map(|e| e.source.clone())
1220                    .unwrap_or_default();
1221                match entity {
1222                    Some(e) => (
1223                        e.version.unwrap_or_default(),
1224                        e.description.unwrap_or_default(),
1225                        e.category.unwrap_or_default(),
1226                        manifest_source,
1227                    ),
1228                    None => (
1229                        installed.get(pkg).cloned().flatten().unwrap_or_default(),
1230                        String::new(),
1231                        String::new(),
1232                        manifest_source,
1233                    ),
1234                }
1235            } else {
1236                return Err(format!(
1237                    "Package '{pkg}' not found in remote indices or locally installed packages"
1238                ));
1239            }
1240        };
1241
1242        // Collect warnings additively; surfaced in response JSON so MCP callers
1243        // (Claude Code UI) observe degraded data instead of silent loss.
1244        // See CLAUDE.md §Service 層の Error 伝播規律 — tracing alone is not enough.
1245        let mut warnings: Vec<String> = Vec::new();
1246
1247        // Cards for this package (single call, reused for stats)
1248        let card_rows = match self.card_store.list(Some(pkg)) {
1249            Ok(rows) => rows,
1250            Err(e) => {
1251                let msg = format!("card store list for '{pkg}': {e}");
1252                tracing::warn!("{}", msg);
1253                warnings.push(msg);
1254                vec![]
1255            }
1256        };
1257        let cards_json = card::summaries_to_json(&card_rows);
1258
1259        // Aliases for this package
1260        let aliases_json = match self.card_store.alias_list(Some(pkg)) {
1261            Ok(rows) => card::aliases_to_json(&rows),
1262            Err(e) => {
1263                let msg = format!("card store alias_list for '{pkg}': {e}");
1264                tracing::warn!("{}", msg);
1265                warnings.push(msg);
1266                serde_json::json!([])
1267            }
1268        };
1269
1270        // Stats: card count, best pass_rate, eval count
1271        let card_count = card_rows.len();
1272        let best_pass_rate = card_rows
1273            .iter()
1274            .filter_map(|c| c.pass_rate)
1275            .fold(f64::NEG_INFINITY, f64::max);
1276        let best_pass_rate = if best_pass_rate.is_finite() {
1277            Some(best_pass_rate)
1278        } else {
1279            None
1280        };
1281
1282        // Eval count from evals directory; corruption warnings surfaced additively.
1283        let eval_count = count_evals_for_pkg(&app_dir, pkg, &mut warnings);
1284
1285        let mut response = serde_json::json!({
1286            "pkg": {
1287                "name": pkg,
1288                "version": version,
1289                "description": description,
1290                "category": category,
1291                "source": source,
1292                "installed": is_installed,
1293            },
1294            "cards": cards_json,
1295            "aliases": aliases_json,
1296            "stats": {
1297                "card_count": card_count,
1298                "eval_count": eval_count,
1299                "best_pass_rate": best_pass_rate,
1300            },
1301        });
1302        if !warnings.is_empty() {
1303            response["warnings"] = serde_json::json!(warnings);
1304        }
1305        Ok(response.to_string())
1306    }
1307
1308    /// Search packages across remote indices + local state.
1309    ///
1310    /// Index URLs are discovered from hub registries, manifest sources,
1311    /// and `AUTO_INSTALL_SOURCES`. Each source is cached independently.
1312    ///
1313    /// ## List-tool options (`opts`)
1314    ///
1315    /// The `opts` parameter carries the list-tool primitives
1316    /// (`limit / sort / filter / fields / verbose`) shared with other
1317    /// list-style MCP tools. Defaults:
1318    ///
1319    /// - `limit` — 50 when `None`. `Some(0)` means **no limit** (return
1320    ///   all matching entries — empty-means-all idiom).
1321    /// - `sort` — `"-installed,name"` when `None` (installed first, then
1322    ///   ascending by name).
1323    /// - `filter` — no additional filter. Legacy `category` /
1324    ///   `installed_only` parameters are merged into the filter map when
1325    ///   `filter` does not already contain those keys (explicit
1326    ///   `filter` wins on conflict).
1327    /// - `fields` / `verbose` — projection is applied to every entry in
1328    ///   the `results` array (see
1329    ///   [`super::list_opts::resolve_fields`]). Top-level keys
1330    ///   (`total`, `sources`, `warnings`) are never projected away.
1331    ///
1332    /// ## docstring handling
1333    ///
1334    /// [`SearchResult::docstring`] is `skip_serializing`, so it is
1335    /// absent from the default serialized view. When the resolved
1336    /// projection contains `"docstring"`, it is re-injected into the
1337    /// per-entry JSON via
1338    /// [`SearchResult::to_value_with_optional_docstring`].
1339    pub(crate) fn hub_search(
1340        &self,
1341        query: Option<&str>,
1342        category: Option<&str>,
1343        installed_only: Option<bool>,
1344        opts: ListOpts,
1345        local_indices: Option<Vec<String>>,
1346    ) -> Result<String, String> {
1347        let app_dir = self.log_config.app_dir();
1348        let (mut remote, mut warnings) = fetch_remote_indices(&app_dir)?;
1349
1350        // Merge local index files (pre-push verification / air-gapped use)
1351        // BEFORE the main `merge` step so that installed packages whose
1352        // metadata appears in a local index are surfaced with their full
1353        // entry (version / source / category) instead of the `Unknown`
1354        // stub produced by `merge`'s local-only fallback path. Each path
1355        // is read and deserialized as a HubIndex; failures go to warnings
1356        // and do not abort the search (partial results > hard failure for
1357        // local verification workflows). Collection results from
1358        // `fetch_remote_indices` take priority on name collisions.
1359        let local_index_paths: Vec<String> = local_indices.clone().unwrap_or_default();
1360        if let Some(paths) = local_indices {
1361            let mut existing: HashSet<String> = remote
1362                .packages
1363                .iter()
1364                .map(|p| p.entity.name.clone())
1365                .collect();
1366            for path in &paths {
1367                match std::fs::read_to_string(path) {
1368                    Err(e) => {
1369                        warnings.push(format!("Failed to read local index {path}: {e}"));
1370                    }
1371                    Ok(raw) => match serde_json::from_str::<HubIndex>(&raw) {
1372                        Err(e) => {
1373                            warnings.push(format!("Failed to parse local index {path}: {e}"));
1374                        }
1375                        Ok(idx) => {
1376                            for entry in idx.packages {
1377                                if existing.insert(entry.entity.name.clone()) {
1378                                    remote.packages.push(entry);
1379                                }
1380                            }
1381                        }
1382                    },
1383                }
1384            }
1385        }
1386
1387        let mut results = merge(&app_dir, &remote)?;
1388
1389        // Filter by query (internal signal covers name/description/
1390        // category/docstring — `matches_query` unchanged).
1391        let query_lower = query.filter(|q| !q.is_empty()).map(|q| q.to_lowercase());
1392        if let Some(ref ql) = query_lower {
1393            results.retain(|r| matches_query(r, ql));
1394        }
1395
1396        // Compute docstring_matched per remaining hit: Some(true) only
1397        // when the query matched docstring and none of {name,
1398        // description, category}; otherwise None.
1399        if let Some(ref ql) = query_lower {
1400            for r in &mut results {
1401                let empty = String::new();
1402                let pkg = &r.entity;
1403                let other_hit = pkg.name.to_lowercase().contains(ql)
1404                    || pkg
1405                        .description
1406                        .as_ref()
1407                        .unwrap_or(&empty)
1408                        .to_lowercase()
1409                        .contains(ql)
1410                    || pkg
1411                        .category
1412                        .as_ref()
1413                        .unwrap_or(&empty)
1414                        .to_lowercase()
1415                        .contains(ql);
1416                let doc_hit = pkg
1417                    .docstring
1418                    .as_ref()
1419                    .unwrap_or(&empty)
1420                    .to_lowercase()
1421                    .contains(ql);
1422                r.docstring_matched = if !other_hit && doc_hit {
1423                    Some(true)
1424                } else {
1425                    None
1426                };
1427            }
1428        }
1429
1430        // Build the effective filter map: start from explicit `opts.filter`,
1431        // then fold legacy `category` / `installed_only` in only if the
1432        // corresponding key is not already set (explicit filter wins).
1433        let mut filter_map: std::collections::HashMap<String, serde_json::Value> =
1434            opts.filter.unwrap_or_default();
1435        if let Some(cat) = category {
1436            filter_map
1437                .entry("category".to_string())
1438                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1439        }
1440        if let Some(only) = installed_only {
1441            // Preserve prior semantic: `installed_only=Some(false)` was a
1442            // no-op (it did not force `installed=false`). Only fold when
1443            // explicitly true.
1444            if only {
1445                filter_map
1446                    .entry("installed".to_string())
1447                    .or_insert(serde_json::Value::Bool(true));
1448            }
1449        }
1450
1451        // Resolve sort keys up-front so an invalid sort string errors out
1452        // before we touch results.
1453        let sort_str = opts.sort.as_deref().unwrap_or("-installed,name");
1454        let sort_keys = parse_sort(sort_str)?;
1455
1456        // Resolve projection fields; this also rejects unknown `verbose`
1457        // values before any heavy work.
1458        let fields = resolve_fields(
1459            opts.verbose.as_deref(),
1460            opts.fields.as_deref(),
1461            HUB_SEARCH_SUMMARY,
1462            HUB_SEARCH_FULL,
1463        )?;
1464        let include_docstring = fields.iter().any(|f| f == "docstring");
1465
1466        // Serialize each result to a Value (docstring optionally attached)
1467        // so filter/sort/projection work uniformly on JSON values.
1468        let mut items: Vec<serde_json::Value> = results
1469            .iter()
1470            .map(|r| r.to_value_with_optional_docstring(include_docstring))
1471            .collect();
1472
1473        // Filter AFTER serialization so filter keys can reference
1474        // projection-level shape (e.g. `category`, `installed`).
1475        if !filter_map.is_empty() {
1476            items.retain(|v| matches_filter(v, &filter_map));
1477        }
1478
1479        // Sort.
1480        apply_sort_by_value(&mut items, &sort_keys);
1481
1482        // Limit. `limit = Some(0)` means "no limit" (return all results)
1483        // — mirrors the `empty=all & some=filter` idiom used across the
1484        // list-tool contract. `None` falls back to the default cap (50).
1485        let total = items.len();
1486        let limit = opts.limit.unwrap_or(50);
1487        if limit > 0 {
1488            items.truncate(limit);
1489        }
1490
1491        // Projection (after truncation — unselected fields are stripped
1492        // from the kept entries only).
1493        let projected: Vec<serde_json::Value> = items
1494            .into_iter()
1495            .map(|v| project_fields(v, &fields))
1496            .collect();
1497
1498        // Collect discovered sources for transparency.
1499        // Warnings from this call (e.g. config.toml parse failure) are
1500        // already present in `warnings` from `fetch_remote_indices` above;
1501        // use a throwaway buffer here to avoid duplicating them.
1502        let mut _src_warnings: Vec<String> = Vec::new();
1503        let mut sources = discover_index_urls(&app_dir, &mut _src_warnings)?;
1504        // Surface local_indices paths in `sources` so callers can see
1505        // what was actually consulted (transparency / debug aid).
1506        sources.extend(local_index_paths);
1507
1508        let mut json = serde_json::json!({
1509            "results": projected,
1510            "total": total,
1511            "sources": sources,
1512        });
1513        if !warnings.is_empty() {
1514            json["warnings"] = serde_json::json!(warnings);
1515        }
1516        Ok(json.to_string())
1517    }
1518
1519    /// Aggregate hub index across all discovered cache sources.
1520    ///
1521    /// Reads the cached `hub_index.json` for each registered source URL
1522    /// (cache-only, no network fetch). Sources that are missing from cache
1523    /// or whose cache file is corrupt are skipped and a warning is collected;
1524    /// the aggregate still succeeds with the remaining sources.
1525    ///
1526    /// Registry-load failures (corrupt `hub_registries.json`) are also
1527    /// demoted to warnings rather than hard errors. Any warnings accumulated
1528    /// before the failure are preserved in the returned `warnings` vec so
1529    /// they reach the MCP wire response.
1530    ///
1531    /// # Returns
1532    /// `Ok((merged_index, warnings))` — always Ok; `warnings` contains any
1533    /// per-source failure messages including registry-load failures.
1534    pub(crate) fn aggregate_index(
1535        &self,
1536    ) -> Result<(HubIndex, Vec<String>), super::error::ServiceError> {
1537        let app_dir = self.log_config.app_dir();
1538        let mut warnings: Vec<String> = Vec::new();
1539
1540        // Discover source URLs (registries + manifest + seeds).
1541        // On failure, demote the error to a warning and return a degraded
1542        // (empty) response. Preserves any warnings already collected
1543        // (e.g. config.toml parse warning) before the failure.
1544        let urls = match discover_index_urls(&app_dir, &mut warnings) {
1545            Ok(u) => u,
1546            Err(e) => {
1547                warnings.push(format!("hub registry discovery failed: {e}"));
1548                return Ok((
1549                    HubIndex {
1550                        schema_version: "hub_index/v0".into(),
1551                        updated_at: String::new(),
1552                        packages: Vec::new(),
1553                    },
1554                    warnings,
1555                ));
1556            }
1557        };
1558
1559        // Empty URL list: return empty index (not an error — fresh install).
1560        if urls.is_empty() {
1561            return Ok((
1562                HubIndex {
1563                    schema_version: "hub_index/v0".into(),
1564                    updated_at: String::new(),
1565                    packages: Vec::new(),
1566                },
1567                warnings,
1568            ));
1569        }
1570
1571        // Load each source from cache. Network fetches are intentionally
1572        // avoided here: resource reads happen synchronously in the MCP
1573        // request path and should not block on network I/O. The cache
1574        // is populated by hub_reindex / hub_search (which do fetch).
1575        // Per-source load failures are best-effort: collect as warnings
1576        // and continue with remaining sources.
1577        let mut all_packages: Vec<IndexEntry> = Vec::new();
1578        let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
1579
1580        for url in &urls {
1581            let merge_packages =
1582                |packages: Vec<IndexEntry>,
1583                 all: &mut Vec<IndexEntry>,
1584                 seen: &mut std::collections::HashSet<String>| {
1585                    for entry in packages {
1586                        if seen.insert(entry.entity.name.clone()) {
1587                            all.push(entry);
1588                        }
1589                    }
1590                };
1591            match load_cached_full(&app_dir, url) {
1592                CacheLookup::Fresh(index) => {
1593                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1594                }
1595                CacheLookup::Stale(index) => {
1596                    // Stale but not absent: merge the data and emit a warning so
1597                    // the caller knows the catalog may be outdated.
1598                    warnings.push(format!(
1599                        "hub cache stale (>{CACHE_TTL_SECS}s) for {url}; run alc_hub_search to refresh"
1600                    ));
1601                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1602                }
1603                CacheLookup::NotPresent => {
1604                    // Cache file absent — not an error, just skip.
1605                }
1606                CacheLookup::Corrupt(e) => {
1607                    // Cache corruption: surface as warning, continue aggregate.
1608                    warnings.push(format!("hub cache read failed for {url}: {e}"));
1609                }
1610            }
1611        }
1612
1613        Ok((
1614            HubIndex {
1615                schema_version: "hub_index/v0".into(),
1616                updated_at: String::new(),
1617                packages: all_packages,
1618            },
1619            warnings,
1620        ))
1621    }
1622}
1623
1624#[cfg(test)]
1625mod tests {
1626    use super::*;
1627
1628    #[test]
1629    fn repo_to_index_url_github() {
1630        assert_eq!(
1631            repo_to_index_url("https://github.com/ynishi/algocline-bundled-packages"),
1632            Some(
1633                "https://raw.githubusercontent.com/ynishi/algocline-bundled-packages/main/hub_index.json"
1634                    .to_string()
1635            )
1636        );
1637    }
1638
1639    #[test]
1640    fn repo_to_index_url_github_trailing_slash() {
1641        assert_eq!(
1642            repo_to_index_url("https://github.com/user/repo/"),
1643            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1644        );
1645    }
1646
1647    #[test]
1648    fn repo_to_index_url_github_dot_git() {
1649        assert_eq!(
1650            repo_to_index_url("https://github.com/user/repo.git"),
1651            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1652        );
1653    }
1654
1655    #[test]
1656    fn repo_to_index_url_direct_json() {
1657        assert_eq!(
1658            repo_to_index_url("https://example.com/my_index.json"),
1659            Some("https://example.com/my_index.json".to_string())
1660        );
1661    }
1662
1663    #[test]
1664    fn repo_to_index_url_unknown_host_no_json() {
1665        assert_eq!(repo_to_index_url("https://example.com/some-repo"), None);
1666    }
1667
1668    #[test]
1669    fn repo_to_index_url_local_path() {
1670        assert_eq!(repo_to_index_url("/home/user/my-pkg"), None);
1671    }
1672
1673    #[test]
1674    fn cache_key_stable() {
1675        let k1 = cache_key("https://example.com/index.json");
1676        let k2 = cache_key("https://example.com/index.json");
1677        assert_eq!(k1, k2);
1678        assert_eq!(k1.len(), 16); // 16 hex chars
1679    }
1680
1681    #[test]
1682    fn cache_key_different_urls() {
1683        let k1 = cache_key("https://a.com/index.json");
1684        let k2 = cache_key("https://b.com/index.json");
1685        assert_ne!(k1, k2);
1686    }
1687
1688    // NOTE: The init.lua meta / docstring parsing tests have moved to
1689    // `algocline_core::pkg::tests` along with the parser itself. The
1690    // `hub.rs` call-path tests now exercise the typed `PkgEntity` via
1691    // `build_index` / `merge` only.
1692
1693    #[test]
1694    fn merge_dedup_uses_hashset() {
1695        // Verify that merge correctly handles local-only packages
1696        // without O(n*m) behavior (structural test).
1697        let tmp = tempfile::tempdir().unwrap();
1698        let app_dir = AppDir::new(tmp.path().to_path_buf());
1699        let remote = HubIndex {
1700            schema_version: "hub_index/v0".into(),
1701            updated_at: String::new(),
1702            packages: vec![IndexEntry {
1703                entity: PkgEntity {
1704                    name: "remote_only".into(),
1705                    version: Some("1.0".into()),
1706                    description: Some("from remote".into()),
1707                    category: Some("test".into()),
1708                    docstring: None,
1709                    tags: None,
1710                    pkg_type: None,
1711                    type_source: None,
1712                },
1713                source: PackageSource::Unknown,
1714                card_count: 0,
1715                best_card: None,
1716            }],
1717        };
1718
1719        let results = merge(&app_dir, &remote).expect("merge over empty app_dir should succeed");
1720        // Should include remote_only + any locally installed packages
1721        assert!(results.iter().any(|r| r.entity.name == "remote_only"));
1722        let remote_result = results
1723            .iter()
1724            .find(|r| r.entity.name == "remote_only")
1725            .unwrap();
1726        assert_eq!(
1727            remote_result.entity.pkg_type,
1728            Some(PkgType::Runnable),
1729            "pre-type-system index entry must default to Runnable"
1730        );
1731    }
1732
1733    #[test]
1734    fn matches_query_searches_docstring() {
1735        let result = SearchResult {
1736            entity: PkgEntity {
1737                name: "cascade".into(),
1738                version: Some("0.1.0".into()),
1739                description: Some("Multi-level routing".into()),
1740                category: Some("meta".into()),
1741                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1742                tags: None,
1743                pkg_type: None,
1744                type_source: None,
1745            },
1746            source: PackageSource::Unknown,
1747            installed: true,
1748            card_count: 0,
1749            best_card: None,
1750            docstring_matched: None,
1751        };
1752
1753        assert!(matches_query(&result, "thompson"), "docstring match");
1754        assert!(matches_query(&result, "FrugalGPT"), "docstring match case");
1755        assert!(matches_query(&result, "routing"), "description match");
1756        assert!(!matches_query(&result, "bayesian"), "no match");
1757    }
1758
1759    // ─── SearchResult::to_value_with_optional_docstring ────────────
1760    //
1761    // `docstring` is not emitted by the default serde path (via the
1762    // `serialize_entity_without_docstring` custom serializer) and is
1763    // re-attached only when the projection path says so. These tests
1764    // pin the two branches of that helper — they are the hinge that
1765    // `verbose="full"` / `fields=["docstring"]` rely on.
1766
1767    fn sample_search_result() -> SearchResult {
1768        SearchResult {
1769            entity: PkgEntity {
1770                name: "cascade".into(),
1771                version: Some("0.1.0".into()),
1772                description: Some("Multi-level routing".into()),
1773                category: Some("reasoning".into()),
1774                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1775                tags: None,
1776                pkg_type: None,
1777                type_source: None,
1778            },
1779            source: PackageSource::Git {
1780                url: "https://example.com/cascade".into(),
1781                rev: None,
1782            },
1783            installed: true,
1784            card_count: 3,
1785            best_card: None,
1786            docstring_matched: None,
1787        }
1788    }
1789
1790    #[test]
1791    fn to_value_default_omits_docstring() {
1792        let r = sample_search_result();
1793        let v = r.to_value_with_optional_docstring(false);
1794        let obj = v.as_object().expect("object");
1795        assert!(
1796            !obj.contains_key("docstring"),
1797            "default summary must not leak docstring"
1798        );
1799        assert_eq!(obj.get("name").and_then(|x| x.as_str()), Some("cascade"));
1800        // `docstring_matched` is Option<None> → `skip_serializing_if`
1801        // must omit it when the query did not mark a docstring-only hit.
1802        assert!(
1803            !obj.contains_key("docstring_matched"),
1804            "docstring_matched=None must be omitted"
1805        );
1806    }
1807
1808    #[test]
1809    fn to_value_include_reattaches_docstring() {
1810        let r = sample_search_result();
1811        let v = r.to_value_with_optional_docstring(true);
1812        let obj = v.as_object().expect("object");
1813        assert_eq!(
1814            obj.get("docstring").and_then(|x| x.as_str()),
1815            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1816        );
1817    }
1818
1819    #[test]
1820    fn to_value_serializes_docstring_matched_when_set() {
1821        let mut r = sample_search_result();
1822        r.docstring_matched = Some(true);
1823        let v = r.to_value_with_optional_docstring(false);
1824        let obj = v.as_object().expect("object");
1825        assert_eq!(
1826            obj.get("docstring_matched").and_then(|x| x.as_bool()),
1827            Some(true)
1828        );
1829    }
1830
1831    // ─── projection glue ──────────────────────────────────────────
1832    //
1833    // These tests exercise the projection path that `hub_search` uses to
1834    // shape output: `resolve_fields` + `project_fields` applied to a
1835    // `to_value_with_optional_docstring`-serialized entry. They pin the
1836    // wf-sim-verbose contract: `fields` wins over `verbose`, default
1837    // summary preset excludes docstring, `full` preset includes
1838    // docstring, unknown keys silently skipped.
1839
1840    #[test]
1841    fn hub_search_default_summary_excludes_docstring() {
1842        let r = sample_search_result();
1843        let fields = resolve_fields(None, None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1844        let include_docstring = fields.iter().any(|f| f == "docstring");
1845        let v = project_fields(
1846            r.to_value_with_optional_docstring(include_docstring),
1847            &fields,
1848        );
1849        let obj = v.as_object().expect("object");
1850        assert!(
1851            !obj.contains_key("docstring"),
1852            "summary preset must omit docstring"
1853        );
1854        // summary preset fields that are present on the sample entry
1855        for key in ["name", "version", "description", "category", "installed"] {
1856            assert!(obj.contains_key(key), "summary preset key {key} missing");
1857        }
1858    }
1859
1860    #[test]
1861    fn hub_search_verbose_full_includes_docstring() {
1862        let r = sample_search_result();
1863        let fields =
1864            resolve_fields(Some("full"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1865        let include_docstring = fields.iter().any(|f| f == "docstring");
1866        let v = project_fields(
1867            r.to_value_with_optional_docstring(include_docstring),
1868            &fields,
1869        );
1870        let obj = v.as_object().expect("object");
1871        assert_eq!(
1872            obj.get("docstring").and_then(|x| x.as_str()),
1873            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1874        );
1875        // full preset superset keys
1876        for key in ["source", "card_count"] {
1877            assert!(obj.contains_key(key), "full preset key {key} missing");
1878        }
1879    }
1880
1881    #[test]
1882    fn hub_search_fields_beats_verbose() {
1883        let r = sample_search_result();
1884        let explicit = vec!["name".to_string(), "docstring".to_string()];
1885        // verbose=summary normally excludes docstring, but explicit
1886        // fields must win.
1887        let fields = resolve_fields(
1888            Some("summary"),
1889            Some(&explicit),
1890            HUB_SEARCH_SUMMARY,
1891            HUB_SEARCH_FULL,
1892        )
1893        .unwrap();
1894        let include_docstring = fields.iter().any(|f| f == "docstring");
1895        let v = project_fields(
1896            r.to_value_with_optional_docstring(include_docstring),
1897            &fields,
1898        );
1899        let obj = v.as_object().expect("object");
1900        assert_eq!(obj.len(), 2, "only the two requested fields");
1901        assert!(obj.contains_key("name"));
1902        assert!(obj.contains_key("docstring"));
1903    }
1904
1905    #[test]
1906    fn hub_search_fields_unknown_key_silently_skipped() {
1907        let r = sample_search_result();
1908        let explicit = vec!["name".to_string(), "bogus".to_string()];
1909        let fields =
1910            resolve_fields(None, Some(&explicit), HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1911        let v = project_fields(r.to_value_with_optional_docstring(false), &fields);
1912        let obj = v.as_object().expect("object");
1913        assert_eq!(obj.len(), 1, "bogus must not appear");
1914        assert!(obj.contains_key("name"));
1915    }
1916
1917    #[test]
1918    fn hub_search_invalid_verbose_errors() {
1919        let err =
1920            resolve_fields(Some("fat"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap_err();
1921        assert!(
1922            err.contains("fat"),
1923            "error must mention the offending value"
1924        );
1925    }
1926
1927    // ─── docstring_matched classification ─────────────────────────
1928    //
1929    // The query-time classification rule: `docstring_matched = Some(true)`
1930    // only when the query hit docstring AND missed name/description/
1931    // category; otherwise `None` (and therefore omitted from output).
1932    // The logic lives inline in `hub_search`; we re-create it here over a
1933    // tiny local helper so the three cases stay pinned as a contract.
1934
1935    fn classify(r: &SearchResult, query: &str) -> Option<bool> {
1936        let ql = query.to_lowercase();
1937        if query.is_empty() {
1938            return None;
1939        }
1940        let empty = String::new();
1941        let pkg = &r.entity;
1942        let other_hit = pkg.name.to_lowercase().contains(&ql)
1943            || pkg
1944                .description
1945                .as_ref()
1946                .unwrap_or(&empty)
1947                .to_lowercase()
1948                .contains(&ql)
1949            || pkg
1950                .category
1951                .as_ref()
1952                .unwrap_or(&empty)
1953                .to_lowercase()
1954                .contains(&ql);
1955        let doc_hit = pkg
1956            .docstring
1957            .as_ref()
1958            .unwrap_or(&empty)
1959            .to_lowercase()
1960            .contains(&ql);
1961        if !other_hit && doc_hit {
1962            Some(true)
1963        } else {
1964            None
1965        }
1966    }
1967
1968    #[test]
1969    fn docstring_matched_true_when_only_docstring_hits() {
1970        let r = sample_search_result();
1971        // "Thompson" appears only in docstring of the sample entry.
1972        assert_eq!(classify(&r, "thompson"), Some(true));
1973    }
1974
1975    #[test]
1976    fn docstring_matched_none_when_name_also_hits() {
1977        let r = sample_search_result();
1978        // "cascade" hits the name; docstring match is irrelevant now.
1979        assert_eq!(classify(&r, "cascade"), None);
1980    }
1981
1982    #[test]
1983    fn docstring_matched_none_when_description_hits() {
1984        let r = sample_search_result();
1985        // "routing" hits description; should be None.
1986        assert_eq!(classify(&r, "routing"), None);
1987    }
1988
1989    #[test]
1990    fn docstring_matched_none_when_query_empty() {
1991        let r = sample_search_result();
1992        assert_eq!(classify(&r, ""), None);
1993    }
1994
1995    // ─── filter fold (legacy params → filter map) ─────────────────
1996    //
1997    // Behavioural rule: legacy `category` / `installed_only=true` fold
1998    // into the filter map only when the corresponding key is not
1999    // already set (explicit `filter` wins). `installed_only=false` is a
2000    // no-op (preserves prior semantics).
2001
2002    fn build_filter_map(
2003        category: Option<&str>,
2004        installed_only: Option<bool>,
2005        explicit: Option<HashMap<String, serde_json::Value>>,
2006    ) -> HashMap<String, serde_json::Value> {
2007        let mut filter_map = explicit.unwrap_or_default();
2008        if let Some(cat) = category {
2009            filter_map
2010                .entry("category".to_string())
2011                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
2012        }
2013        if let Some(only) = installed_only {
2014            if only {
2015                filter_map
2016                    .entry("installed".to_string())
2017                    .or_insert(serde_json::Value::Bool(true));
2018            }
2019        }
2020        filter_map
2021    }
2022
2023    #[test]
2024    fn filter_by_category_via_legacy_param() {
2025        let m = build_filter_map(Some("reasoning"), None, None);
2026        assert_eq!(
2027            m.get("category"),
2028            Some(&serde_json::Value::String("reasoning".to_string()))
2029        );
2030    }
2031
2032    #[test]
2033    fn filter_by_installed_only_via_legacy_param() {
2034        let m = build_filter_map(None, Some(true), None);
2035        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
2036    }
2037
2038    #[test]
2039    fn filter_installed_only_false_is_noop() {
2040        let m = build_filter_map(None, Some(false), None);
2041        assert!(
2042            !m.contains_key("installed"),
2043            "installed_only=false should not fold in"
2044        );
2045    }
2046
2047    #[test]
2048    fn filter_beats_legacy_param_on_conflict() {
2049        // Explicit filter says category=meta; legacy says reasoning.
2050        // Explicit must win.
2051        let mut explicit = HashMap::new();
2052        explicit.insert(
2053            "category".to_string(),
2054            serde_json::Value::String("meta".to_string()),
2055        );
2056        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
2057        assert_eq!(
2058            m.get("category"),
2059            Some(&serde_json::Value::String("meta".to_string()))
2060        );
2061    }
2062
2063    #[test]
2064    fn filter_merges_legacy_when_no_conflict() {
2065        // Explicit sets a different key; legacy category should still
2066        // be folded in.
2067        let mut explicit = HashMap::new();
2068        explicit.insert("installed".to_string(), serde_json::Value::Bool(true));
2069        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
2070        assert_eq!(
2071            m.get("category"),
2072            Some(&serde_json::Value::String("reasoning".to_string()))
2073        );
2074        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
2075    }
2076
2077    // ─── load_registries: file-absent vs. corrupt JSON ────────────
2078
2079    #[test]
2080    fn load_registries_missing_file_returns_default() {
2081        let tmp = tempfile::tempdir().unwrap();
2082        let app_dir = AppDir::new(tmp.path().to_path_buf());
2083        // No hub_registries.json created — must return Ok(empty).
2084        let result = load_registries(&app_dir);
2085        assert!(result.is_ok(), "missing file should be Ok: {result:?}");
2086        assert!(result.unwrap().registries.is_empty());
2087    }
2088
2089    #[test]
2090    fn load_registries_corrupt_json_returns_err() {
2091        let tmp = tempfile::tempdir().unwrap();
2092        let app_dir = AppDir::new(tmp.path().to_path_buf());
2093        // Write corrupt JSON to the registries path.
2094        let path = app_dir.hub_registries_json();
2095        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2096        std::fs::write(&path, b"not valid json {{{").unwrap();
2097        let result = load_registries(&app_dir);
2098        assert!(result.is_err(), "corrupt JSON must propagate Err");
2099        let msg = result.unwrap_err().to_string();
2100        assert!(
2101            msg.contains("parse"),
2102            "error message should mention parse: {msg}"
2103        );
2104    }
2105
2106    #[test]
2107    fn load_registries_valid_file_deserializes() {
2108        let tmp = tempfile::tempdir().unwrap();
2109        let app_dir = AppDir::new(tmp.path().to_path_buf());
2110        let path = app_dir.hub_registries_json();
2111        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2112        let content = r#"{"registries":[{"source":"https://github.com/user/repo","origin":"pkg_install","added_at":"2026-01-01T00:00:00Z"}]}"#;
2113        std::fs::write(&path, content).unwrap();
2114        let result = load_registries(&app_dir);
2115        assert!(result.is_ok(), "valid JSON must parse Ok: {result:?}");
2116        let reg = result.unwrap();
2117        assert_eq!(reg.registries.len(), 1);
2118        assert_eq!(reg.registries[0].source, "https://github.com/user/repo");
2119    }
2120
2121    // ─── default sort verification ────────────────────────────────
2122
2123    #[test]
2124    fn default_sort_is_minus_installed_name() {
2125        let keys = parse_sort("-installed,name").unwrap();
2126        assert_eq!(keys.len(), 2);
2127        assert_eq!(keys[0].key, "installed");
2128        assert!(keys[0].desc, "installed must sort desc (true first)");
2129        assert_eq!(keys[1].key, "name");
2130        assert!(!keys[1].desc);
2131
2132        // Apply it against a small vec and confirm the expected order.
2133        let mut items = vec![
2134            serde_json::json!({"installed": false, "name": "zeta"}),
2135            serde_json::json!({"installed": true, "name": "mu"}),
2136            serde_json::json!({"installed": false, "name": "alpha"}),
2137            serde_json::json!({"installed": true, "name": "beta"}),
2138        ];
2139        apply_sort_by_value(&mut items, &keys);
2140        let names: Vec<&str> = items
2141            .iter()
2142            .map(|v| v.get("name").and_then(|x| x.as_str()).unwrap_or(""))
2143            .collect();
2144        assert_eq!(names, vec!["beta", "mu", "alpha", "zeta"]);
2145    }
2146
2147    // ─── Phase 3 MED batch: error-propagation tests ───────────────
2148
2149    // Site 1: collection_url_from_config
2150
2151    #[test]
2152    fn collection_url_from_config_absent_returns_ok_none() {
2153        let tmp = tempfile::tempdir().unwrap();
2154        let app_dir = AppDir::new(tmp.path().to_path_buf());
2155        // No config.toml created — absent file must be Ok(None), not Err.
2156        let result = collection_url_from_config(&app_dir);
2157        assert!(
2158            matches!(result, Ok(None)),
2159            "absent config.toml must return Ok(None), got {result:?}"
2160        );
2161    }
2162
2163    #[test]
2164    fn collection_url_from_config_corrupt_toml_returns_err() {
2165        let tmp = tempfile::tempdir().unwrap();
2166        let app_dir = AppDir::new(tmp.path().to_path_buf());
2167        let path = app_dir.config_toml();
2168        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2169        std::fs::write(&path, b"[hub\ncollection_url = broken{{{{").unwrap();
2170        let result = collection_url_from_config(&app_dir);
2171        assert!(
2172            result.is_err(),
2173            "corrupt TOML must return Err, got {result:?}"
2174        );
2175    }
2176
2177    #[test]
2178    fn collection_url_from_config_valid_returns_url() {
2179        let tmp = tempfile::tempdir().unwrap();
2180        let app_dir = AppDir::new(tmp.path().to_path_buf());
2181        let path = app_dir.config_toml();
2182        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2183        std::fs::write(
2184            &path,
2185            b"[hub]\ncollection_url = \"https://example.com/hub_index.json\"\n",
2186        )
2187        .unwrap();
2188        let result = collection_url_from_config(&app_dir);
2189        assert_eq!(
2190            result.unwrap(),
2191            Some("https://example.com/hub_index.json".to_string())
2192        );
2193    }
2194
2195    #[test]
2196    fn collection_url_from_config_no_hub_section_returns_none() {
2197        let tmp = tempfile::tempdir().unwrap();
2198        let app_dir = AppDir::new(tmp.path().to_path_buf());
2199        let path = app_dir.config_toml();
2200        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2201        std::fs::write(&path, b"[some_other_section]\nfoo = \"bar\"\n").unwrap();
2202        let result = collection_url_from_config(&app_dir);
2203        assert!(
2204            matches!(result, Ok(None)),
2205            "config without [hub] must return Ok(None), got {result:?}"
2206        );
2207    }
2208
2209    // Site 2: load_cached
2210
2211    #[test]
2212    fn load_cached_absent_returns_ok_none() {
2213        let tmp = tempfile::tempdir().unwrap();
2214        let app_dir = AppDir::new(tmp.path().to_path_buf());
2215        let result = load_cached(&app_dir, "https://example.com/index.json");
2216        assert!(
2217            matches!(result, Ok(None)),
2218            "absent cache file must return Ok(None), got {result:?}"
2219        );
2220    }
2221
2222    #[test]
2223    fn load_cached_corrupt_json_within_ttl_returns_err() {
2224        let tmp = tempfile::tempdir().unwrap();
2225        let app_dir = AppDir::new(tmp.path().to_path_buf());
2226        let url = "https://example.com/index.json";
2227        let dir = cache_dir(&app_dir);
2228        std::fs::create_dir_all(&dir).unwrap();
2229        let path = dir.join(format!("{}.json", cache_key(url)));
2230        std::fs::write(&path, b"not valid json {{{{").unwrap();
2231        // file is freshly written so within TTL
2232        let result = load_cached(&app_dir, url);
2233        assert!(
2234            result.is_err(),
2235            "corrupt JSON within TTL must return Err, got {result:?}"
2236        );
2237    }
2238
2239    #[test]
2240    fn load_cached_valid_json_within_ttl_returns_index() {
2241        let tmp = tempfile::tempdir().unwrap();
2242        let app_dir = AppDir::new(tmp.path().to_path_buf());
2243        let url = "https://example.com/index.json";
2244        let dir = cache_dir(&app_dir);
2245        std::fs::create_dir_all(&dir).unwrap();
2246        let path = dir.join(format!("{}.json", cache_key(url)));
2247        let index_json = r#"{"schema_version":"hub_index/v0","updated_at":"2026-01-01T00:00:00Z","packages":[]}"#;
2248        std::fs::write(&path, index_json).unwrap();
2249        let result = load_cached(&app_dir, url);
2250        assert!(
2251            matches!(result, Ok(Some(_))),
2252            "valid JSON within TTL must return Ok(Some(_)), got {result:?}"
2253        );
2254    }
2255
2256    /// Helper: backdate a file's mtime by `secs` seconds so it appears stale.
2257    fn backdate_file(path: &std::path::Path, secs: u64) {
2258        let past = std::time::SystemTime::now() - std::time::Duration::from_secs(secs);
2259        let times = std::fs::FileTimes::new()
2260            .set_accessed(past)
2261            .set_modified(past);
2262        let f = std::fs::OpenOptions::new()
2263            .write(true)
2264            .open(path)
2265            .expect("open for backdate");
2266        f.set_times(times).expect("set_times");
2267    }
2268
2269    // L-1: load_cached_full returns Stale (not NotPresent) for outdated cache.
2270    #[test]
2271    fn load_cached_full_stale_file_returns_stale_variant() {
2272        let tmp = tempfile::tempdir().unwrap();
2273        let app_dir = AppDir::new(tmp.path().to_path_buf());
2274        let url = "https://stale.example.com/index.json";
2275        // Write a valid cache entry using the helper to get correct serialization.
2276        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2277        // Backdate by 2× TTL to ensure it's stale.
2278        let path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2279        backdate_file(&path, CACHE_TTL_SECS * 2);
2280        let result = load_cached_full(&app_dir, url);
2281        assert!(
2282            matches!(result, CacheLookup::Stale(_)),
2283            "backdated cache must return Stale variant"
2284        );
2285    }
2286
2287    // L-1: aggregate_index with stale cache returns data AND emits warning.
2288    #[tokio::test]
2289    async fn aggregate_index_stale_cache_returns_data_and_warning() {
2290        let tmp = tempfile::tempdir().unwrap();
2291        let app_dir_root = tmp.path().to_path_buf();
2292        let app_dir = AppDir::new(app_dir_root.clone());
2293        let url = "https://stale-agg.example.com/index.json";
2294
2295        // Write a valid cache file with one package.
2296        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2297        // Backdate the cache file so it's stale.
2298        let cache_path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2299        backdate_file(&cache_path, CACHE_TTL_SECS * 2);
2300
2301        // Register the URL in hub_registries.
2302        let reg_path = app_dir.hub_registries_json();
2303        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2304        let reg_json = serde_json::json!({
2305            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2306        });
2307        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2308
2309        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2310        let (index, warnings) = AppService::aggregate_index(&svc).unwrap();
2311
2312        // Data from stale cache must still be present.
2313        assert!(
2314            index.packages.iter().any(|p| p.entity.name == "stale_pkg"),
2315            "stale package must be included in aggregate, got: {:?}",
2316            index
2317                .packages
2318                .iter()
2319                .map(|p| &p.entity.name)
2320                .collect::<Vec<_>>()
2321        );
2322        // A stale warning must be emitted.
2323        assert!(
2324            warnings
2325                .iter()
2326                .any(|w| w.contains("stale") && w.contains(url)),
2327            "stale cache must emit a warning mentioning the URL, got: {warnings:?}"
2328        );
2329    }
2330
2331    // Site 3: count_evals_for_pkg
2332
2333    #[test]
2334    fn count_evals_for_pkg_absent_dir_returns_zero_no_warnings() {
2335        let tmp = tempfile::tempdir().unwrap();
2336        let app_dir = AppDir::new(tmp.path().to_path_buf());
2337        let mut warnings: Vec<String> = Vec::new();
2338        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2339        assert_eq!(count, 0, "absent evals dir must return 0");
2340        assert!(
2341            warnings.is_empty(),
2342            "absent evals dir must produce no warnings, got {warnings:?}"
2343        );
2344    }
2345
2346    #[test]
2347    fn count_evals_for_pkg_corrupt_meta_surfaces_warning() {
2348        let tmp = tempfile::tempdir().unwrap();
2349        let app_dir = AppDir::new(tmp.path().to_path_buf());
2350        let evals_dir = app_dir.evals_dir();
2351        std::fs::create_dir_all(&evals_dir).unwrap();
2352
2353        // Write a result JSON stub so the file is scanned.
2354        std::fs::write(evals_dir.join("cot_9999.json"), b"{}").unwrap();
2355        // Write a corrupt meta.json for the same stem.
2356        std::fs::write(evals_dir.join("cot_9999.meta.json"), b"not json {{{{").unwrap();
2357
2358        let mut warnings: Vec<String> = Vec::new();
2359        let _count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2360        assert!(
2361            !warnings.is_empty(),
2362            "corrupt meta.json must produce at least one warning, got {warnings:?}"
2363        );
2364        assert!(
2365            warnings[0].contains("parse"),
2366            "warning must mention parse: {}",
2367            warnings[0]
2368        );
2369    }
2370
2371    #[test]
2372    fn count_evals_for_pkg_valid_meta_counts_correctly() {
2373        let tmp = tempfile::tempdir().unwrap();
2374        let app_dir = AppDir::new(tmp.path().to_path_buf());
2375        let evals_dir = app_dir.evals_dir();
2376        std::fs::create_dir_all(&evals_dir).unwrap();
2377
2378        // Write a result JSON + valid meta for strategy "cot".
2379        let meta = r#"{"eval_id":"cot_1","strategy":"cot","timestamp":1}"#;
2380        std::fs::write(evals_dir.join("cot_1.json"), b"{}").unwrap();
2381        std::fs::write(evals_dir.join("cot_1.meta.json"), meta).unwrap();
2382
2383        let mut warnings: Vec<String> = Vec::new();
2384        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2385        assert_eq!(count, 1, "should count 1 valid eval");
2386        assert!(warnings.is_empty(), "no warnings expected: {warnings:?}");
2387    }
2388
2389    // ─── aggregate_index unit tests ───────────────────────────────
2390
2391    /// Write a minimal HubIndex JSON to the per-source cache for a URL.
2392    fn write_cache_for_url(app_dir: &AppDir, url: &str, index: &HubIndex) {
2393        let dir = cache_dir(app_dir);
2394        std::fs::create_dir_all(&dir).unwrap();
2395        let path = dir.join(format!("{}.json", cache_key(url)));
2396        // justification: test helper, panicking on failure is acceptable in tests
2397        std::fs::write(&path, serde_json::to_string_pretty(index).unwrap()).unwrap();
2398    }
2399
2400    fn make_index(packages: Vec<(&str, &str)>) -> HubIndex {
2401        HubIndex {
2402            schema_version: "hub_index/v0".into(),
2403            updated_at: String::new(),
2404            packages: packages
2405                .into_iter()
2406                .map(|(name, version)| IndexEntry {
2407                    entity: PkgEntity {
2408                        name: name.to_string(),
2409                        version: Some(version.to_string()),
2410                        description: None,
2411                        category: None,
2412                        docstring: None,
2413                        tags: None,
2414                        pkg_type: None,
2415                        type_source: None,
2416                    },
2417                    source: PackageSource::Unknown,
2418                    card_count: 0,
2419                    best_card: None,
2420                })
2421                .collect(),
2422        }
2423    }
2424
2425    // T1: empty sources → empty index, no warnings
2426    #[test]
2427    fn aggregate_index_empty_sources_returns_empty() {
2428        let tmp = tempfile::tempdir().unwrap();
2429        let app_dir = AppDir::new(tmp.path().to_path_buf());
2430        // No registries, no manifest, no seeds in cache → no URLs → empty index.
2431        // discover_index_urls will still produce AUTO_INSTALL_SOURCES seeds,
2432        // but their cache files don't exist → Ok(None) for each → empty result.
2433        let (index, warnings) = {
2434            // Build a minimal AppService-like test by calling the free functions
2435            // and replicating the aggregate_index logic directly.
2436            let mut w: Vec<String> = Vec::new();
2437            let urls = discover_index_urls(&app_dir, &mut w).unwrap();
2438            let mut packages: Vec<IndexEntry> = Vec::new();
2439            let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2440            for url in &urls {
2441                if let Ok(Some(idx)) = load_cached(&app_dir, url) {
2442                    for e in idx.packages {
2443                        if seen.insert(e.entity.name.clone()) {
2444                            packages.push(e);
2445                        }
2446                    }
2447                }
2448            }
2449            (
2450                HubIndex {
2451                    schema_version: "hub_index/v0".into(),
2452                    updated_at: String::new(),
2453                    packages,
2454                },
2455                w,
2456            )
2457        };
2458        assert!(
2459            index.packages.is_empty(),
2460            "no cached sources should produce empty packages"
2461        );
2462        assert!(warnings.is_empty(), "no warnings expected for cache misses");
2463    }
2464
2465    // T1: one source in cache → packages returned
2466    #[test]
2467    fn aggregate_index_one_source_returns_packages() {
2468        let tmp = tempfile::tempdir().unwrap();
2469        let app_dir = AppDir::new(tmp.path().to_path_buf());
2470        let url = "https://example.com/test_index.json";
2471        let source_index = make_index(vec![("cot", "0.1.0"), ("ucb", "0.2.0")]);
2472        write_cache_for_url(&app_dir, url, &source_index);
2473
2474        // Register the URL in hub_registries so discover_index_urls finds it.
2475        let reg_path = app_dir.hub_registries_json();
2476        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2477        let reg_json = serde_json::json!({
2478            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2479        });
2480        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2481
2482        let mut warnings: Vec<String> = Vec::new();
2483        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2484        let mut packages: Vec<IndexEntry> = Vec::new();
2485        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2486        for u in &urls {
2487            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2488                for e in idx.packages {
2489                    if seen.insert(e.entity.name.clone()) {
2490                        packages.push(e);
2491                    }
2492                }
2493            }
2494        }
2495
2496        assert!(
2497            packages.iter().any(|p| p.entity.name == "cot"),
2498            "cot expected"
2499        );
2500        assert!(
2501            packages.iter().any(|p| p.entity.name == "ucb"),
2502            "ucb expected"
2503        );
2504    }
2505
2506    // T2: duplicate package across two sources → first source wins
2507    #[test]
2508    fn aggregate_index_deduplicate_by_name_first_wins() {
2509        let tmp = tempfile::tempdir().unwrap();
2510        let app_dir = AppDir::new(tmp.path().to_path_buf());
2511        let url_a = "https://a.example.com/index.json";
2512        let url_b = "https://b.example.com/index.json";
2513
2514        // Both sources have "cot" but different versions.
2515        let idx_a = make_index(vec![("cot", "1.0.0")]);
2516        let idx_b = make_index(vec![("cot", "2.0.0"), ("ucb", "0.1.0")]);
2517        write_cache_for_url(&app_dir, url_a, &idx_a);
2518        write_cache_for_url(&app_dir, url_b, &idx_b);
2519
2520        let reg_path = app_dir.hub_registries_json();
2521        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2522        let reg_json = serde_json::json!({
2523            "registries": [
2524                {"source": url_a, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"},
2525                {"source": url_b, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}
2526            ]
2527        });
2528        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2529
2530        let mut warnings: Vec<String> = Vec::new();
2531        let urls = {
2532            let mut raw = discover_index_urls(&app_dir, &mut warnings).unwrap();
2533            // Restrict to only our two test URLs so seed URLs don't interfere.
2534            raw.retain(|u| u == url_a || u == url_b);
2535            raw
2536        };
2537
2538        let mut packages: Vec<IndexEntry> = Vec::new();
2539        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2540        for u in &urls {
2541            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2542                for e in idx.packages {
2543                    if seen.insert(e.entity.name.clone()) {
2544                        packages.push(e);
2545                    }
2546                }
2547            }
2548        }
2549
2550        let cot_count = packages.iter().filter(|p| p.entity.name == "cot").count();
2551        assert_eq!(cot_count, 1, "dedup: cot must appear exactly once");
2552        let ucb_count = packages.iter().filter(|p| p.entity.name == "ucb").count();
2553        assert_eq!(ucb_count, 1, "ucb from second source must appear");
2554    }
2555
2556    // T3: corrupt cache file → warning collected, other sources unaffected
2557    #[test]
2558    fn aggregate_index_corrupt_cache_collects_warning() {
2559        let tmp = tempfile::tempdir().unwrap();
2560        let app_dir = AppDir::new(tmp.path().to_path_buf());
2561        let url_corrupt = "https://corrupt.example.com/index.json";
2562
2563        // Write corrupt JSON to the cache slot.
2564        let dir = cache_dir(&app_dir);
2565        std::fs::create_dir_all(&dir).unwrap();
2566        let path = dir.join(format!("{}.json", cache_key(url_corrupt)));
2567        std::fs::write(&path, b"{{{{ not valid json").unwrap();
2568
2569        let reg_path = app_dir.hub_registries_json();
2570        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2571        let reg_json = serde_json::json!({
2572            "registries": [{"source": url_corrupt, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2573        });
2574        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2575
2576        let mut warnings: Vec<String> = Vec::new();
2577        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2578        let mut packages: Vec<IndexEntry> = Vec::new();
2579        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2580        let mut extra_warnings: Vec<String> = Vec::new();
2581        for u in &urls {
2582            match load_cached(&app_dir, u) {
2583                Ok(Some(idx)) => {
2584                    for e in idx.packages {
2585                        if seen.insert(e.entity.name.clone()) {
2586                            packages.push(e);
2587                        }
2588                    }
2589                }
2590                Ok(None) => {}
2591                Err(e) => extra_warnings.push(format!("hub cache read failed for {u}: {e}")),
2592            }
2593        }
2594
2595        assert!(
2596            !extra_warnings.is_empty(),
2597            "corrupt cache must produce a warning"
2598        );
2599        assert!(
2600            extra_warnings[0].contains("hub cache read failed"),
2601            "warning text mismatch: {}",
2602            extra_warnings[0]
2603        );
2604        assert!(packages.is_empty(), "no packages from corrupt source");
2605    }
2606
2607    // M-2: registry-load failure is demoted to a warning; accumulated
2608    // warnings before the failure are preserved in the returned vec.
2609    #[tokio::test]
2610    async fn aggregate_index_registry_failure_returns_ok_with_warning() {
2611        let tmp = tempfile::tempdir().unwrap();
2612        let app_dir_root = tmp.path().to_path_buf();
2613
2614        // Write corrupt hub_registries.json so load_registries fails.
2615        let reg_path = AppDir::new(app_dir_root.clone()).hub_registries_json();
2616        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2617        std::fs::write(&reg_path, b"{{{{ not valid json").unwrap();
2618
2619        // Also write a corrupt config.toml to generate a pre-registry warning.
2620        // (config.toml hub.collection_url parse warns before the registry step.)
2621        // We skip this to keep the test minimal — just verify registry failure
2622        // demotes to warning and result is Ok.
2623
2624        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2625        let result = AppService::aggregate_index(&svc);
2626        assert!(
2627            result.is_ok(),
2628            "aggregate_index must return Ok even on registry-load failure, got: {result:?}"
2629        );
2630        let (index, warnings) = result.unwrap();
2631        assert!(
2632            index.packages.is_empty(),
2633            "degraded response must have empty packages"
2634        );
2635        assert!(
2636            !warnings.is_empty(),
2637            "registry-load failure must produce a warning"
2638        );
2639        assert!(
2640            warnings
2641                .iter()
2642                .any(|w| w.contains("hub registry discovery failed")),
2643            "warning must mention registry discovery failure, got: {warnings:?}"
2644        );
2645    }
2646}