Skip to main content

algocline_app/service/
hub.rs

1//! Hub — package discovery, search, and index management.
2//!
3//! The Hub is algocline's package registry layer.  It aggregates remote
4//! index data with local install state so that users (via AI) can
5//! **discover** packages they haven't installed yet, and **inspect**
6//! installed packages with full Card and eval statistics.
7//!
8//! ## Staged design
9//!
10//! | Stage | Scope | Status |
11//! |-------|-------|--------|
12//! | **1** | Card Collection install, Pkg-bundled cards | Done |
13//! | **2** | Hub MCP tools (`hub_search`, `hub_info`, `hub_reindex`), local index | Done |
14//! | **3** | Aggregated remote collection index, `hub_publish`, LP | Planned |
15//!
16//! ## MCP tools
17//!
18//! | Tool | Description |
19//! |------|-------------|
20//! | `alc_hub_search` | Discover packages across remote + local indices |
21//! | `alc_hub_info` | Detailed single-package view (meta + cards + aliases + stats) |
22//! | `alc_hub_reindex` | Rebuild index from local packages or a repo checkout |
23//!
24//! ## Index schema (`hub_index/v0`)
25//!
26//! ```json
27//! {
28//!   "schema_version": "hub_index/v0",
29//!   "updated_at": "2026-04-12T10:00:00Z",
30//!   "packages": [{
31//!     "name": "cot",
32//!     "version": "0.1.0",
33//!     "description": "Chain-of-Thought prompting",
34//!     "category": "reasoning",
35//!     "source": "https://github.com/...",
36//!     "card_count": 3,
37//!     "best_card": { "card_id": "...", "model": "...", "pass_rate": 0.82, "scenario": "..." }
38//!   }]
39//! }
40//! ```
41//!
42//! Index generation uses `init.lua` M.meta parsing only — no Lua VM
43//! required.  This keeps the index buildable in CI environments.
44//!
45//! ## Index URL discovery (4-tier)
46//!
47//! Sources are checked in priority order; URLs are deduplicated:
48//!
49//!   0. **Collection URL** — `[hub].collection_url` in `~/.algocline/config.toml`.
50//!      Aggregated index containing all known packages (Stage 3).
51//!   1. **Hub registries** — `~/.algocline/hub_registries.json`, auto-populated
52//!      by `pkg_install` and `card_install`.
53//!   2. **Installed manifest** — `~/.algocline/installed.json`, fallback for
54//!      sources registered before registries existed.
55//!   3. **Compiled-in seeds** — bundled-packages source for first-run bootstrap.
56//!
57//! GitHub repo URLs are transformed to raw index URLs:
58//!
59//! ```text
60//! https://github.com/{owner}/{repo}
61//!   → https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
62//! ```
63//!
64//! ## Caching
65//!
66//! Remote indices are cached per-source at
67//! `~/.algocline/hub_cache/{hash}.json` where hash is FNV-1a of the
68//! URL.  TTL is 1 hour.
69//!
70//! ## Registry persistence
71//!
72//! `~/.algocline/hub_registries.json` records source URLs from
73//! `pkg_install` and `card_install`.  Written atomically (tempfile +
74//! rename) to avoid corruption on interruption.
75
76use std::collections::{HashMap, HashSet};
77use std::path::PathBuf;
78
79use serde::{Deserialize, Serialize};
80
81use algocline_core::{AppDir, PkgEntity};
82
83use super::list_opts::{
84    apply_sort_by_value, matches_filter, parse_sort, project_fields, resolve_fields, ListOpts,
85    HUB_SEARCH_FULL, HUB_SEARCH_SUMMARY,
86};
87use super::manifest;
88use super::resolve::AUTO_INSTALL_SOURCES;
89use super::source::PackageSource;
90use super::AppService;
91use super::HubRegistriesError;
92
93// ─── Constants ─────────────────────────────────────────────────
94
95/// Cache TTL in seconds (1 hour).
96const CACHE_TTL_SECS: u64 = 3600;
97
98/// HTTP request timeout (30 seconds).
99const HTTP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
100
101// ─── Index schema ──────────────────────────────────────────────
102
103/// Remote index — same shape as the local index so merge is trivial.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub(crate) struct HubIndex {
106    pub schema_version: String,
107    #[serde(default)]
108    pub updated_at: String,
109    #[serde(default)]
110    pub packages: Vec<IndexEntry>,
111}
112
113/// One package in the index.
114///
115/// `entity` carries the canonical Lua `M.meta` projection (name, version,
116/// description, category, docstring) via `#[serde(flatten)]` so the wire
117/// shape is identical to the pre-refactor flat-object layout. `source`
118/// is the typed package source; `card_count` / `best_card` are hub-side
119/// enrichments computed at index-build time.
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub(crate) struct IndexEntry {
122    #[serde(flatten)]
123    pub entity: PkgEntity,
124    /// How this package was obtained. Typed on write; legacy bare strings
125    /// in pre-migration `hub_index.json` deserialize via the serde shim
126    /// on `PackageSource` (see `service::source`).
127    #[serde(default)]
128    pub source: PackageSource,
129    #[serde(default)]
130    pub card_count: usize,
131    #[serde(default)]
132    pub best_card: Option<BestCard>,
133}
134
135/// Best card summary within a package.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub(crate) struct BestCard {
138    pub card_id: String,
139    #[serde(default)]
140    pub model: String,
141    #[serde(default)]
142    pub pass_rate: f64,
143    #[serde(default)]
144    pub scenario: String,
145}
146
147/// Search result — index entry enriched with local install state.
148///
149/// `entity.docstring` is `skip_serializing` (via the `skip_docstring`
150/// custom serializer on the flattened struct) so the default serde output
151/// never exposes the docstring field — docstrings can be large and
152/// dominate payload size. The `hub_search` projection path re-attaches
153/// the docstring to the output object when the resolved field set
154/// contains `"docstring"`, via
155/// [`SearchResult::to_value_with_optional_docstring`].
156///
157/// `docstring_matched` is a query-time signal: it is `Some(true)` only
158/// when the query hit docstring and none of {name, description, category}.
159/// Otherwise (no query, or query hit any of the other fields) it is
160/// `None` and omitted from the output.
161///
162/// Because `#[serde(flatten)]` composes poorly with field-level
163/// `skip_serializing`, we carry the non-docstring part of `PkgEntity`
164/// via a custom `serialize_entity_without_docstring` path rather than a
165/// bare `#[serde(flatten)]`. The struct still holds a full `PkgEntity`
166/// internally for consistency with `IndexEntry`.
167#[derive(Debug, Clone, Serialize)]
168struct SearchResult {
169    #[serde(flatten, serialize_with = "serialize_entity_without_docstring")]
170    entity: PkgEntity,
171    /// Typed source (mirrors `IndexEntry.source`).
172    source: PackageSource,
173    installed: bool,
174    card_count: usize,
175    best_card: Option<BestCard>,
176    #[serde(skip_serializing_if = "Option::is_none")]
177    docstring_matched: Option<bool>,
178}
179
180/// Serialize a `PkgEntity` as a flat JSON object, intentionally dropping
181/// the `docstring` field so large docstrings do not dominate `hub_search`
182/// payloads. The projection path re-attaches docstring via
183/// [`SearchResult::to_value_with_optional_docstring`].
184fn serialize_entity_without_docstring<S>(entity: &PkgEntity, ser: S) -> Result<S::Ok, S::Error>
185where
186    S: serde::Serializer,
187{
188    use serde::ser::SerializeMap;
189    let mut map = ser.serialize_map(Some(4))?;
190    map.serialize_entry("name", &entity.name)?;
191    map.serialize_entry("version", &entity.version)?;
192    map.serialize_entry("description", &entity.description)?;
193    map.serialize_entry("category", &entity.category)?;
194    map.end()
195}
196
197impl SearchResult {
198    /// Serialize `self` to a JSON `Value`, optionally re-attaching
199    /// `docstring` to the resulting object.
200    ///
201    /// `skip_serializing` removes `docstring` from every serde output
202    /// path. When projection selects `docstring` as an output field, we
203    /// need to put it back — this helper bridges that gap by inserting
204    /// the field manually into the resulting `Value::Object`.
205    ///
206    /// Returns the original `Value` unchanged if serialization produced
207    /// a non-object (should not happen for `SearchResult`, but we stay
208    /// defensive because the downstream `project_fields` contract
209    /// tolerates non-objects).
210    fn to_value_with_optional_docstring(&self, include_docstring: bool) -> serde_json::Value {
211        let mut v = serde_json::to_value(self).unwrap_or(serde_json::Value::Null);
212        if include_docstring {
213            if let serde_json::Value::Object(ref mut map) = v {
214                let doc = self.entity.docstring.clone().unwrap_or_default();
215                map.insert("docstring".to_string(), serde_json::Value::String(doc));
216            }
217        }
218        v
219    }
220}
221
222// ─── Hub registries ───────────────────────────────────────────
223//
224// Persistent file (`~/.algocline/hub_registries.json`) that records
225// source URLs from `pkg_install` and `card_install`.  This is the
226// primary source for Hub index URL discovery — the manifest and the
227// bundled-packages seed serve as fallback sources.
228
229/// One entry in `hub_registries.json`.
230#[derive(Debug, Clone, Serialize, Deserialize)]
231pub(crate) struct RegistryEntry {
232    /// Original source URL (Git repo or local path).
233    pub source: String,
234    /// How it was registered: "pkg_install" or "card_install".
235    pub origin: String,
236    /// ISO 8601 timestamp of when the entry was added.
237    pub added_at: String,
238}
239
240/// Top-level registries file.
241#[derive(Debug, Clone, Serialize, Deserialize, Default)]
242pub(crate) struct HubRegistries {
243    pub registries: Vec<RegistryEntry>,
244}
245
246fn registries_path(app_dir: &AppDir) -> PathBuf {
247    app_dir.hub_registries_json()
248}
249
250/// Load registries from disk.
251///
252/// Returns `Ok(HubRegistries::default())` when the file does not yet exist —
253/// the file is created lazily on first `register_source` call. Returns `Err`
254/// when the file exists but cannot be read (I/O error) or parsed (corrupt
255/// JSON), so callers can surface the failure instead of silently degrading hub
256/// discovery.
257fn load_registries(app_dir: &AppDir) -> Result<HubRegistries, HubRegistriesError> {
258    let path = registries_path(app_dir);
259    if !path.exists() {
260        return Ok(HubRegistries::default());
261    }
262    let content = std::fs::read_to_string(&path).map_err(|e| {
263        HubRegistriesError::Parse(format!(
264            "failed to read hub_registries.json at {}: {e}",
265            path.display()
266        ))
267    })?;
268    serde_json::from_str::<HubRegistries>(&content).map_err(|e| {
269        HubRegistriesError::Parse(format!(
270            "failed to parse hub_registries.json at {}: {e}",
271            path.display()
272        ))
273    })
274}
275
276/// Register a source URL.  Deduplicates by normalized URL.
277///
278/// Returns `Ok(())` on success or when the input is skipped (empty /
279/// local path / already registered). Filesystem failures are returned
280/// as `Err(String)` so callers can surface them on the MCP wire
281/// response — the registry is best-effort relative to the `pkg_install`
282/// itself, but the caller still needs to know when it silently failed
283/// (otherwise hub discovery degrades without any signal).
284///
285/// Uses atomic write (tempfile + rename) to avoid partial writes if
286/// the process is interrupted. Read-modify-write is not locked across
287/// processes, but MCP servers are single-process so this is safe in
288/// practice.
289pub(crate) fn register_source(app_dir: &AppDir, source: &str, origin: &str) -> Result<(), String> {
290    let normalized = source.trim_end_matches('/').to_string();
291    if normalized.is_empty() {
292        return Ok(());
293    }
294    // Skip local paths — they can't host a remote index
295    if normalized.starts_with('/') || normalized.starts_with('.') {
296        return Ok(());
297    }
298
299    let path = registries_path(app_dir);
300    if let Some(parent) = path.parent() {
301        std::fs::create_dir_all(parent).map_err(|e| {
302            format!(
303                "failed to create hub registries dir {}: {e}",
304                parent.display()
305            )
306        })?;
307    }
308
309    // Re-read from disk right before write to minimize TOCTOU window.
310    // Parse failure is propagated — a corrupt registries file means we
311    // cannot safely read-modify-write without risking data loss.
312    let mut reg = load_registries(app_dir).map_err(|e| format!("cannot register source: {e}"))?;
313
314    // Already registered?
315    if reg
316        .registries
317        .iter()
318        .any(|e| e.source.trim_end_matches('/') == normalized)
319    {
320        return Ok(());
321    }
322
323    reg.registries.push(RegistryEntry {
324        source: normalized,
325        origin: origin.to_string(),
326        added_at: manifest::now_iso8601(),
327    });
328
329    // Atomic write: write to temp file, then rename
330    let json = serde_json::to_string_pretty(&reg)
331        .map_err(|e| format!("failed to serialize hub registries: {e}"))?;
332    let tmp_path = path.with_extension("json.tmp");
333    std::fs::write(&tmp_path, &json).map_err(|e| {
334        format!(
335            "failed to write hub registries tmp {}: {e}",
336            tmp_path.display()
337        )
338    })?;
339    std::fs::rename(&tmp_path, &path).map_err(|e| {
340        // Best-effort cleanup of the stale tmp file on rename failure.
341        let _ = std::fs::remove_file(&tmp_path);
342        format!(
343            "failed to atomically rename hub registries onto {}: {e}",
344            path.display()
345        )
346    })
347}
348
349// ─── Hub config ──────────────────────────────────────────────
350//
351// Optional `[hub]` section in `~/.algocline/config.toml`:
352//
353//   [hub]
354//   collection_url = "https://raw.githubusercontent.com/.../hub_index.json"
355//
356// When set, this is fetched as Tier 0 (the aggregated collection
357// index containing all known packages, including uninstalled ones).
358
359/// Read the `[hub].collection_url` from `~/.algocline/config.toml`.
360///
361/// Returns:
362/// - `Ok(Some(url))` — file exists, parses cleanly, `[hub].collection_url` present and non-empty.
363/// - `Ok(None)` — file absent (normal: config is optional) or `[hub].collection_url` not set.
364/// - `Err(msg)` — file exists but TOML parse fails (corruption); caller should surface as warning.
365fn collection_url_from_config(app_dir: &AppDir) -> Result<Option<String>, String> {
366    let path = app_dir.config_toml();
367    let content = match std::fs::read_to_string(&path) {
368        Ok(c) => c,
369        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
370        Err(_) => return Ok(None), // permission errors etc. treated as absent
371    };
372    let doc: toml_edit::DocumentMut = content
373        .parse()
374        .map_err(|e| format!("config.toml parse: {e}"))?;
375    let url = match doc
376        .get("hub")
377        .and_then(|h| h.get("collection_url"))
378        .and_then(|v| v.as_str())
379    {
380        Some(s) => s.trim().to_string(),
381        None => return Ok(None),
382    };
383    if url.is_empty() {
384        Ok(None)
385    } else {
386        Ok(Some(url))
387    }
388}
389
390// ─── Index URL discovery ──────────────────────────────────────
391//
392// Derives remote index URLs from:
393//   0. Hub Collection URL (from config.toml) — aggregated index
394//   1. Hub registries (`hub_registries.json`) — primary source
395//   2. Unique `source` fields in the installed-packages manifest
396//   3. Bundled-packages seed (for first-run bootstrap)
397//
398// GitHub repos are transformed:
399//   https://github.com/{owner}/{repo}  →
400//   https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
401
402/// Convert a GitHub repo URL to a raw `hub_index.json` URL.
403/// Returns `None` for non-GitHub URLs (future: support other hosts).
404fn repo_to_index_url(repo_url: &str) -> Option<String> {
405    let trimmed = repo_url.trim_end_matches('/').trim_end_matches(".git");
406    if let Some(path) = trimmed.strip_prefix("https://github.com/") {
407        // path = "owner/repo"
408        let parts: Vec<&str> = path.splitn(3, '/').collect();
409        if parts.len() >= 2 {
410            return Some(format!(
411                "https://raw.githubusercontent.com/{}/{}/main/hub_index.json",
412                parts[0], parts[1]
413            ));
414        }
415    }
416    // Non-GitHub URL: assume it's already a direct index URL
417    if trimmed.ends_with(".json") {
418        Some(trimmed.to_string())
419    } else {
420        None
421    }
422}
423
424/// Collect unique index URLs from config + registries + manifest + bundled seeds.
425///
426/// Returns `Err` if the installed manifest cannot be read (corrupt JSON /
427/// permission denied). The function intentionally surfaces manifest-read
428/// failures rather than silently skipping — callers feed these URLs into
429/// hub resolution, and a partial URL set is indistinguishable from a
430/// corrupt manifest without the signal.
431///
432/// `warnings` collects non-fatal issues (e.g. config.toml TOML parse failure)
433/// that the caller should surface on the MCP wire response.
434fn discover_index_urls(
435    app_dir: &AppDir,
436    warnings: &mut Vec<String>,
437) -> Result<Vec<String>, String> {
438    let mut index_urls: Vec<String> = Vec::new();
439
440    // 0. From config.toml [hub].collection_url (Tier 0 — aggregated collection).
441    // Parse failures (corrupted config) are collected as warnings so the
442    // rest of discovery proceeds — the file is optional, but corruption
443    // is distinguishable from absence and must be surfaced to the caller.
444    match collection_url_from_config(app_dir) {
445        Ok(Some(url)) => index_urls.push(url),
446        Ok(None) => {}
447        Err(e) => warnings.push(format!("config.toml hub.collection_url: {e}")),
448    }
449
450    let mut repo_urls: HashSet<String> = HashSet::new();
451
452    // 1. From hub registries (primary). Parse failure is propagated so
453    // callers know the registry is degraded — a partial URL set from a
454    // corrupt file is indistinguishable from intentionally empty.
455    // `HubRegistriesError` is converted to `String` at the wire boundary
456    // (`discover_index_urls` still returns `Result<_, String>`).
457    let reg = load_registries(app_dir).map_err(|e| e.to_string())?;
458    for entry in &reg.registries {
459        let normalized = entry.source.trim_end_matches('/').to_string();
460        if !normalized.is_empty() {
461            repo_urls.insert(normalized);
462        }
463    }
464
465    // 2. From manifest (catch sources registered before hub_registries existed).
466    // Only Git-variant sources can host a remote hub_index.json; other variants
467    // (Path / Installed / Bundled / Unknown) are skipped by `git_url()` returning None.
468    let m = manifest::load_manifest(app_dir)?;
469    for entry in m.packages.values() {
470        if let Some(url) = entry.source.git_url() {
471            let normalized = url.trim_end_matches('/').to_string();
472            if !normalized.is_empty() {
473                repo_urls.insert(normalized);
474            }
475        }
476    }
477
478    // 3. Fallback: bundled sources (ensures at least these are checked)
479    for url in AUTO_INSTALL_SOURCES {
480        repo_urls.insert(url.to_string());
481    }
482
483    // 4. Transform repo URLs → index URLs, dedup against Tier 0
484    let existing: HashSet<String> = index_urls.iter().cloned().collect();
485    let mut derived: Vec<String> = repo_urls
486        .iter()
487        .filter_map(|url| repo_to_index_url(url))
488        .filter(|url| !existing.contains(url))
489        .collect();
490    derived.sort();
491    derived.dedup();
492    index_urls.extend(derived);
493
494    Ok(index_urls)
495}
496
497// ─── Per-source cache ─────────────────────────────────────────
498//
499// Each remote index is cached separately at
500// `~/.algocline/hub_cache/{hash}.json` where hash is derived from
501// the index URL. This avoids mixing data from different registries
502// and allows per-source TTL validation.
503
504fn cache_dir(app_dir: &AppDir) -> PathBuf {
505    app_dir.hub_cache_dir()
506}
507
508fn cache_key(url: &str) -> String {
509    // Simple hash: use the URL bytes to produce a stable hex string.
510    // Avoids pulling in a hash crate — good enough for cache file naming.
511    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV-1a offset basis
512    for b in url.as_bytes() {
513        h ^= *b as u64;
514        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
515    }
516    format!("{h:016x}")
517}
518
519/// Result of a cache lookup distinguishing absent, stale, fresh, and corrupt.
520///
521/// Used by `load_cached_full` (called from `aggregate_index`) to allow
522/// stale data to be merged into the aggregate while a warning is emitted.
523/// `load_cached` (used by `fetch_one`) maps both `NotPresent` and `Stale`
524/// to `Ok(None)` for backward compat.
525enum CacheLookup {
526    /// File absent.
527    NotPresent,
528    /// File present but older than `CACHE_TTL_SECS`; contains the stale data.
529    Stale(HubIndex),
530    /// File present, within TTL, parsed cleanly.
531    Fresh(HubIndex),
532    /// File present (within TTL) but JSON parse failed.
533    Corrupt(String),
534}
535
536/// Full cache lookup that distinguishes stale from absent.
537///
538/// Used by `aggregate_index` so stale data can still be merged with a
539/// warning, rather than being silently discarded.
540fn load_cached_full(app_dir: &AppDir, url: &str) -> CacheLookup {
541    let dir = cache_dir(app_dir);
542    let path = dir.join(format!("{}.json", cache_key(url)));
543    if !path.exists() {
544        return CacheLookup::NotPresent;
545    }
546    let metadata = match std::fs::metadata(&path) {
547        Ok(m) => m,
548        Err(_) => return CacheLookup::NotPresent,
549    };
550    let age = match metadata.modified().ok().and_then(|t| t.elapsed().ok()) {
551        Some(a) => a,
552        None => return CacheLookup::NotPresent,
553    };
554    let content = match std::fs::read_to_string(&path) {
555        Ok(c) => c,
556        Err(e) => return CacheLookup::Corrupt(format!("hub cache read {}: {e}", path.display())),
557    };
558    match serde_json::from_str::<HubIndex>(&content) {
559        Ok(index) => {
560            if age.as_secs() > CACHE_TTL_SECS {
561                CacheLookup::Stale(index)
562            } else {
563                CacheLookup::Fresh(index)
564            }
565        }
566        Err(e) => CacheLookup::Corrupt(format!("hub cache parse {}: {e}", path.display())),
567    }
568}
569
570/// Load cached remote index for a specific URL if fresh (within TTL).
571///
572/// Returns:
573/// - `Ok(Some(index))` — cache hit: file exists, within TTL, parses cleanly.
574/// - `Ok(None)` — cache miss: file absent, expired, or metadata unreadable (treat as miss).
575/// - `Err(msg)` — file exists and is within TTL but JSON parse fails (corruption);
576///   caller should surface as warning and fall back to a network fetch.
577fn load_cached(app_dir: &AppDir, url: &str) -> Result<Option<HubIndex>, String> {
578    match load_cached_full(app_dir, url) {
579        CacheLookup::Fresh(index) => Ok(Some(index)),
580        CacheLookup::NotPresent | CacheLookup::Stale(_) => Ok(None),
581        CacheLookup::Corrupt(msg) => Err(msg),
582    }
583}
584
585/// Save remote index to per-source cache file.
586///
587/// Returns `Ok(())` on success. Cache write failures are returned as
588/// `Err(String)`; the caller (`fetch_one`) carries them out of band so
589/// hub fetch still completes (the index is in memory) but the warning
590/// surfaces to the MCP wire response via the existing `warnings` channel.
591fn save_cached(app_dir: &AppDir, url: &str, index: &HubIndex) -> Result<(), String> {
592    let dir = cache_dir(app_dir);
593    std::fs::create_dir_all(&dir)
594        .map_err(|e| format!("failed to create hub cache dir {}: {e}", dir.display()))?;
595    let path = dir.join(format!("{}.json", cache_key(url)));
596    let json = serde_json::to_string_pretty(index)
597        .map_err(|e| format!("failed to serialize hub cache: {e}"))?;
598    std::fs::write(&path, json)
599        .map_err(|e| format!("failed to write hub cache {}: {e}", path.display()))
600}
601
602// ─── Remote fetch ──────────────────────────────────────────────
603
604/// Fetch a single remote index by URL, using per-source cache.
605///
606/// Returns the index plus an optional cache-related warning. The warning
607/// is non-None when either:
608/// - The network fetch succeeded but persisting the cache to disk failed.
609/// - The cache file was present and within TTL but failed to parse
610///   (corruption); in that case the function falls back to a network
611///   fetch and includes the parse-failure in the warning so the operator
612///   can investigate the on-disk state.
613fn fetch_one(app_dir: &AppDir, url: &str) -> Result<(HubIndex, Option<String>), String> {
614    // Distinguish cache corruption (Err) from cache miss (Ok(None)).
615    match load_cached(app_dir, url) {
616        Ok(Some(cached)) => return Ok((cached, None)),
617        Ok(None) => {} // cache miss — proceed to network fetch
618        Err(e) => {
619            // Cache file is corrupt. Fall through to network fetch and
620            // carry the corruption warning so the caller can surface it.
621            // We don't return Err here because the network path may still succeed.
622            let warn = format!("hub cache corrupted for {url}: {e}; falling back to network");
623            // Attempt network fetch; on success, attach the cache-corruption warning.
624            return fetch_one_from_network(app_dir, url)
625                .map(|(idx, save_warn)| {
626                    // Prefer the corruption warning; save_warn is secondary.
627                    let combined = Some(match save_warn {
628                        Some(sw) => format!("{warn}; {sw}"),
629                        None => warn.clone(),
630                    });
631                    (idx, combined)
632                })
633                .map_err(|fetch_err| format!("{warn}; network fetch also failed: {fetch_err}"));
634        }
635    }
636
637    fetch_one_from_network(app_dir, url)
638}
639
640/// Network-only path for fetching a remote index (no cache read).
641///
642/// On success returns `(index, Option<cache_write_warning>)`.
643fn fetch_one_from_network(
644    app_dir: &AppDir,
645    url: &str,
646) -> Result<(HubIndex, Option<String>), String> {
647    let agent = ureq::Agent::new_with_config(
648        ureq::config::Config::builder()
649            .timeout_global(Some(HTTP_TIMEOUT))
650            .build(),
651    );
652    let body: String = agent
653        .get(url)
654        .call()
655        .map_err(|e| format!("Failed to fetch {url}: {e}"))?
656        .body_mut()
657        .read_to_string()
658        .map_err(|e| format!("Failed to read response from {url}: {e}"))?;
659
660    let index: HubIndex = serde_json::from_str(&body)
661        .map_err(|e| format!("Failed to parse index from {url}: {e}"))?;
662
663    let cache_warning = save_cached(app_dir, url, &index)
664        .err()
665        .map(|e| format!("hub cache write for {url}: {e}"));
666    Ok((index, cache_warning))
667}
668
669/// Fetch all discovered remote indices and merge into one.
670/// Falls back gracefully: failed sources are skipped with warnings.
671fn fetch_remote_indices(app_dir: &AppDir) -> Result<(HubIndex, Vec<String>), String> {
672    let mut warnings: Vec<String> = Vec::new();
673    let urls = discover_index_urls(app_dir, &mut warnings)?;
674    let mut all_packages: Vec<IndexEntry> = Vec::new();
675    let mut seen_names: HashSet<String> = HashSet::new();
676
677    for url in &urls {
678        match fetch_one(app_dir, url) {
679            Ok((index, cache_warning)) => {
680                for entry in index.packages {
681                    if seen_names.insert(entry.entity.name.clone()) {
682                        all_packages.push(entry);
683                    }
684                    // If duplicate name across sources, first wins
685                }
686                if let Some(w) = cache_warning {
687                    warnings.push(w);
688                }
689            }
690            Err(e) => {
691                warnings.push(e);
692            }
693        }
694    }
695
696    if all_packages.is_empty() && !warnings.is_empty() {
697        warnings.insert(
698            0,
699            "all remote indices unavailable, showing local packages only".to_string(),
700        );
701    }
702
703    let merged = HubIndex {
704        schema_version: "hub_index/v0".into(),
705        updated_at: String::new(),
706        packages: all_packages,
707    };
708    Ok((merged, warnings))
709}
710
711// ─── Local state ───────────────────────────────────────────────
712
713/// Build a set of locally installed package names from `installed.json`
714/// and the `~/.algocline/packages/` directory.
715fn installed_packages(app_dir: &AppDir) -> Result<HashMap<String, Option<String>>, String> {
716    let mut map = HashMap::new();
717
718    // From manifest (has version info)
719    let m = manifest::load_manifest(app_dir)?;
720    for (name, entry) in &m.packages {
721        map.insert(name.clone(), entry.version.clone());
722    }
723
724    // Also scan packages/ dir in case manifest is stale
725    let pkg_dir = app_dir.packages_dir();
726    if let Ok(entries) = std::fs::read_dir(&pkg_dir) {
727        for entry in entries.flatten() {
728            if entry.path().is_dir() {
729                if let Some(name) = entry.file_name().to_str() {
730                    map.entry(name.to_string()).or_insert(None);
731                }
732            }
733        }
734    }
735
736    Ok(map)
737}
738
739/// Count local cards per package from `{app_dir}/cards/{pkg}/`.
740fn local_card_counts(app_dir: &AppDir) -> HashMap<String, usize> {
741    let mut map = HashMap::new();
742    let cards_dir = app_dir.cards_dir();
743    let entries = match std::fs::read_dir(&cards_dir) {
744        Ok(e) => e,
745        Err(_) => return map,
746    };
747    for entry in entries.flatten() {
748        if !entry.path().is_dir() {
749            continue;
750        }
751        let pkg = match entry.file_name().to_str() {
752            Some(n) => n.to_string(),
753            None => continue,
754        };
755        let count = std::fs::read_dir(entry.path())
756            .map(|es| {
757                es.flatten()
758                    .filter(|e| e.path().extension().is_some_and(|ext| ext == "toml"))
759                    .count()
760            })
761            .unwrap_or(0);
762        if count > 0 {
763            map.insert(pkg, count);
764        }
765    }
766    map
767}
768
769/// Count eval results for a specific package by scanning `{app_dir}/evals/`.
770///
771/// Reads only `.meta.json` files (lightweight) to check the strategy field.
772/// Falls back to reading full eval JSON if meta is missing.
773///
774/// `warnings` receives per-file corruption messages (read or parse failures).
775/// I/O errors on the directory itself return 0 silently (evals dir absent is
776/// a legitimate "no evals yet" state). Per-file errors that indicate corruption
777/// (file exists but is unreadable or unparseable) are pushed to `warnings` so
778/// the caller can surface them on the MCP wire response.
779fn count_evals_for_pkg(app_dir: &AppDir, pkg: &str, warnings: &mut Vec<String>) -> usize {
780    let evals_dir = app_dir.evals_dir();
781    let entries = match std::fs::read_dir(&evals_dir) {
782        Ok(e) => e,
783        Err(_) => return 0,
784    };
785
786    // Collect all filenames first so ordering doesn't matter.
787    // We track stems that have a .meta.json to avoid reading the full eval JSON.
788    let mut meta_stems: HashSet<String> = HashSet::new();
789    let mut meta_matches: usize = 0;
790    let mut non_meta_paths: Vec<(PathBuf, String)> = Vec::new(); // (path, stem)
791
792    for entry in entries.flatten() {
793        let path = entry.path();
794        let name = match path.file_name().and_then(|n| n.to_str()) {
795            Some(n) => n.to_string(),
796            None => continue,
797        };
798
799        if name.ends_with(".meta.json") {
800            let stem = name.trim_end_matches(".meta.json").to_string();
801            meta_stems.insert(stem.clone());
802            // Distinguish I/O failure from parse failure so corruption is visible.
803            match std::fs::read_to_string(&path) {
804                Ok(content) => match serde_json::from_str::<serde_json::Value>(&content) {
805                    Ok(val) => {
806                        if val.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
807                            meta_matches += 1;
808                        }
809                    }
810                    Err(e) => warnings.push(format!("eval meta parse {}: {e}", path.display())),
811                },
812                Err(e) => warnings.push(format!("eval meta read {}: {e}", path.display())),
813            }
814            continue;
815        }
816
817        // Skip non-json or comparison files
818        if !name.ends_with(".json") || name.starts_with("compare_") {
819            continue;
820        }
821
822        let stem = path
823            .file_stem()
824            .and_then(|s| s.to_str())
825            .unwrap_or("")
826            .to_string();
827        non_meta_paths.push((path, stem));
828    }
829
830    // Only read full eval JSON for entries without a .meta.json.
831    // Distinguish I/O and parse failures; both are surfaced as warnings.
832    let mut fallback_matches: usize = 0;
833    for (path, stem) in &non_meta_paths {
834        if meta_stems.contains(stem) {
835            continue;
836        }
837        match std::fs::read_to_string(path) {
838            Ok(c) => match serde_json::from_str::<serde_json::Value>(&c) {
839                Ok(v) => {
840                    if v.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
841                        fallback_matches += 1;
842                    }
843                }
844                Err(e) => warnings.push(format!("eval result parse {}: {e}", path.display())),
845            },
846            Err(e) => warnings.push(format!("eval result read {}: {e}", path.display())),
847        }
848    }
849
850    meta_matches + fallback_matches
851}
852
853// ─── Merge ─────────────────────────────────────────────────────
854
855/// Merge remote index with local install state.
856///
857/// When a package is installed locally and the remote index lacks a
858/// docstring (pre-v0.21 indices), the docstring is extracted from the
859/// local `init.lua` so that full-text search works immediately.
860fn merge(app_dir: &AppDir, remote: &HubIndex) -> Result<Vec<SearchResult>, String> {
861    let installed = installed_packages(app_dir)?;
862    let card_counts = local_card_counts(app_dir);
863    let pkg_dir: Option<PathBuf> = Some(app_dir.packages_dir());
864
865    let mut seen: HashSet<String> = HashSet::new();
866    let mut results: Vec<SearchResult> = Vec::new();
867
868    for entry in &remote.packages {
869        let pkg_name = &entry.entity.name;
870        let is_installed = installed.contains_key(pkg_name);
871        let local_cards = card_counts.get(pkg_name).copied().unwrap_or(0);
872
873        // Supplement empty docstring from local init.lua when installed.
874        // Re-parse via `PkgEntity` so the supplementation path stays
875        // consistent with `build_index`.
876        let docstring = if entry.entity.docstring.as_deref().unwrap_or("").is_empty()
877            && is_installed
878        {
879            pkg_dir
880                .as_ref()
881                .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(pkg_name).join("init.lua")))
882                .and_then(|e| e.docstring)
883        } else {
884            entry.entity.docstring.clone()
885        };
886
887        seen.insert(pkg_name.clone());
888        let mut merged_entity = entry.entity.clone();
889        merged_entity.docstring = docstring;
890        results.push(SearchResult {
891            entity: merged_entity,
892            source: entry.source.clone(),
893            installed: is_installed,
894            card_count: if is_installed && local_cards > entry.card_count {
895                local_cards
896            } else {
897                entry.card_count
898            },
899            best_card: entry.best_card.clone(),
900            docstring_matched: None,
901        });
902    }
903
904    // Add local-only packages (not in remote index).
905    for (name, version) in &installed {
906        if seen.contains(name) {
907            continue;
908        }
909        // Pull full `PkgEntity` from local init.lua when available (keeps the
910        // wire shape consistent with remote entries). When the package does
911        // not parse as a `PkgEntity` (missing `M.meta.name`), fall back to
912        // a minimal entity with just the directory name and the manifest
913        // version — the entry still appears in local-only listings, but the
914        // richer projection fields are simply absent.
915        let parsed_entity = pkg_dir
916            .as_ref()
917            .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(name).join("init.lua")));
918        let entity = parsed_entity.unwrap_or(PkgEntity {
919            name: name.clone(),
920            version: version.clone(),
921            description: None,
922            category: None,
923            docstring: None,
924        });
925        results.push(SearchResult {
926            entity,
927            source: PackageSource::Unknown,
928            installed: true,
929            card_count: card_counts.get(name).copied().unwrap_or(0),
930            best_card: None,
931            docstring_matched: None,
932        });
933    }
934
935    Ok(results)
936}
937
938// ─── Search (filtering) ───────────────────────────────────────
939
940fn matches_query(result: &SearchResult, query: &str) -> bool {
941    let q = query.to_lowercase();
942    let pkg = &result.entity;
943    let empty = String::new();
944    pkg.name.to_lowercase().contains(&q)
945        || pkg
946            .description
947            .as_ref()
948            .unwrap_or(&empty)
949            .to_lowercase()
950            .contains(&q)
951        || pkg
952            .category
953            .as_ref()
954            .unwrap_or(&empty)
955            .to_lowercase()
956            .contains(&q)
957        || pkg
958            .docstring
959            .as_ref()
960            .unwrap_or(&empty)
961            .to_lowercase()
962            .contains(&q)
963}
964
965// ─── Index generation (reindex) ───────────────────────────────
966//
967// The non-Lua-VM parser that used to live here
968// (`parse_meta_from_init_lua` / `extract_docstring`) has moved into
969// `algocline_core::PkgEntity::parse_from_init_lua`, where it is shared
970// with the manifest / lockfile wire format. The parsing tests migrated
971// with it; `hub.rs` now just consumes the typed `PkgEntity` projection.
972
973/// Build a hub index by scanning a packages directory.
974///
975/// When `source_dir` is provided, scans that directory directly
976/// (for generating an index from a repo checkout).  Metadata comes
977/// only from `init.lua` — no manifest lookup, no card counts.
978///
979/// When `source_dir` is `None`, scans `~/.algocline/packages/` and
980/// enriches entries with manifest source and local card counts.
981fn build_index(app_dir: &AppDir, source_dir: Option<&std::path::Path>) -> Result<HubIndex, String> {
982    let empty = || HubIndex {
983        schema_version: "hub_index/v0".into(),
984        updated_at: super::manifest::now_iso8601(),
985        packages: Vec::new(),
986    };
987
988    let pkg_dir = match source_dir {
989        Some(d) => d.to_path_buf(),
990        None => app_dir.packages_dir(),
991    };
992
993    let use_local_state = source_dir.is_none();
994    let card_counts = if use_local_state {
995        local_card_counts(app_dir)
996    } else {
997        HashMap::new()
998    };
999    // Manifest read errors surface as `Err` rather than degrading to an
1000    // empty manifest — when building the local hub index, a corrupt
1001    // `installed.json` silently turning all package sources into
1002    // `PackageSource::Unknown` would be indistinguishable from the
1003    // legitimate "no source recorded" state, and would ship into
1004    // generated `hub_index.json` files verbatim.
1005    let manifest = if use_local_state {
1006        manifest::load_manifest(app_dir)?
1007    } else {
1008        manifest::Manifest::default()
1009    };
1010
1011    let mut entries = Vec::new();
1012
1013    // Missing / unreadable `pkg_dir` is a legitimate "no packages yet"
1014    // state on a fresh install — distinct from manifest corruption
1015    // above, and safe to surface as an empty index.
1016    let dir_entries = match std::fs::read_dir(&pkg_dir) {
1017        Ok(e) => e,
1018        Err(_) => return Ok(empty()),
1019    };
1020
1021    for entry in dir_entries.flatten() {
1022        if !entry.path().is_dir() {
1023            continue;
1024        }
1025        let dir_name = match entry.file_name().to_str() {
1026            Some(n) if !n.starts_with('.') && !n.starts_with('_') => n.to_string(),
1027            _ => continue,
1028        };
1029
1030        let init_lua = entry.path().join("init.lua");
1031        if !init_lua.exists() {
1032            continue;
1033        }
1034
1035        // Silent-exclude gate: `PkgEntity::parse_from_init_lua` returns `None`
1036        // when `M.meta` is absent or `M.meta.name` is empty. Directories that
1037        // happen to contain an `init.lua` but aren't algocline packages
1038        // (e.g. `alc_shapes/`, a type DSL library) are dropped from the index
1039        // rather than falling through with a placeholder name — that would
1040        // pollute hub_search.
1041        let Some(entity) = PkgEntity::parse_from_init_lua(&init_lua) else {
1042            continue;
1043        };
1044
1045        // Use manifest source only for local-state mode. When the manifest
1046        // has no record for this directory, default to `PackageSource::Unknown`
1047        // (via `Default`) — hub consumers see it as "source not recorded".
1048        let source = manifest
1049            .packages
1050            .get(&dir_name)
1051            .map(|e| e.source.clone())
1052            .unwrap_or_default();
1053
1054        entries.push(IndexEntry {
1055            entity,
1056            source,
1057            card_count: card_counts.get(&dir_name).copied().unwrap_or(0),
1058            best_card: None,
1059        });
1060    }
1061
1062    entries.sort_by(|a, b| a.entity.name.cmp(&b.entity.name));
1063
1064    Ok(HubIndex {
1065        schema_version: "hub_index/v0".into(),
1066        updated_at: super::manifest::now_iso8601(),
1067        packages: entries,
1068    })
1069}
1070
1071// ─── Public API ────────────────────────────────────────────────
1072
1073impl AppService {
1074    /// Generate a hub index from a packages directory.
1075    ///
1076    /// When `source_dir` is provided, scans that directory (e.g. a
1077    /// repo checkout) — pure metadata extraction, no manifest or card
1078    /// data mixed in.  When omitted, scans `~/.algocline/packages/`.
1079    ///
1080    /// Writes the index to `output_path` (for CI / publishing).
1081    /// Does NOT touch the remote search cache.
1082    pub fn hub_reindex(
1083        &self,
1084        output_path: Option<&str>,
1085        source_dir: Option<&str>,
1086    ) -> Result<String, String> {
1087        let src = source_dir.map(std::path::Path::new);
1088        if let Some(d) = src {
1089            if !d.is_dir() {
1090                return Err(format!("source_dir '{}' is not a directory", d.display()));
1091            }
1092        }
1093        let app_dir = self.log_config.app_dir();
1094        let index = build_index(&app_dir, src)?;
1095
1096        let written_path = if let Some(path) = output_path {
1097            let json = serde_json::to_string_pretty(&index)
1098                .map_err(|e| format!("Failed to serialize index: {e}"))?;
1099            std::fs::write(path, &json)
1100                .map_err(|e| format!("Failed to write index to {path}: {e}"))?;
1101            Some(path.to_string())
1102        } else {
1103            None
1104        };
1105
1106        let response = serde_json::json!({
1107            "package_count": index.packages.len(),
1108            "updated_at": index.updated_at,
1109            "output_path": written_path,
1110            "source_dir": source_dir,
1111        });
1112        Ok(response.to_string())
1113    }
1114
1115    /// Show detailed information for a single package.
1116    ///
1117    /// Aggregates package metadata (from index or local `init.lua`),
1118    /// all Cards, aliases, and eval stats into one response.
1119    pub fn hub_info(&self, pkg: &str) -> Result<String, String> {
1120        use algocline_engine::card;
1121
1122        // Guard against path traversal
1123        if pkg.contains("..") || pkg.contains('/') || pkg.contains('\\') {
1124            return Err(format!("Invalid package name: '{pkg}'"));
1125        }
1126
1127        // Package metadata: try remote index first, fall back to local
1128        let app_dir = self.log_config.app_dir();
1129        let installed = installed_packages(&app_dir)?;
1130        let is_installed = installed.contains_key(pkg);
1131
1132        // Resolve package metadata: try remote index first, fall back to
1133        // local init.lua. `version` / `description` / `category` are modelled
1134        // as `Option<String>` at the `PkgEntity` layer; at this API surface
1135        // we flatten `None` to empty string so the wire shape (non-null
1136        // JSON string fields) stays unchanged for existing consumers.
1137        let (version, description, category, source) = {
1138            let (remote, _) = fetch_remote_indices(&app_dir)?;
1139            if let Some(entry) = remote.packages.iter().find(|e| e.entity.name == pkg) {
1140                (
1141                    entry.entity.version.clone().unwrap_or_default(),
1142                    entry.entity.description.clone().unwrap_or_default(),
1143                    entry.entity.category.clone().unwrap_or_default(),
1144                    entry.source.clone(),
1145                )
1146            } else if is_installed {
1147                // Fall back to local init.lua parse via `PkgEntity`. When
1148                // the file is not a valid package (no `M.meta.name`), we
1149                // degrade gracefully by returning the manifest-recorded
1150                // version and empty string fields — mirroring the pre-typed
1151                // behaviour.
1152                let init_lua = app_dir.packages_dir().join(pkg).join("init.lua");
1153                let entity = PkgEntity::parse_from_init_lua(&init_lua);
1154                let manifest_source = manifest::load_manifest(&app_dir)?
1155                    .packages
1156                    .get(pkg)
1157                    .map(|e| e.source.clone())
1158                    .unwrap_or_default();
1159                match entity {
1160                    Some(e) => (
1161                        e.version.unwrap_or_default(),
1162                        e.description.unwrap_or_default(),
1163                        e.category.unwrap_or_default(),
1164                        manifest_source,
1165                    ),
1166                    None => (
1167                        installed.get(pkg).cloned().flatten().unwrap_or_default(),
1168                        String::new(),
1169                        String::new(),
1170                        manifest_source,
1171                    ),
1172                }
1173            } else {
1174                return Err(format!(
1175                    "Package '{pkg}' not found in remote indices or locally installed packages"
1176                ));
1177            }
1178        };
1179
1180        // Collect warnings additively; surfaced in response JSON so MCP callers
1181        // (Claude Code UI) observe degraded data instead of silent loss.
1182        // See CLAUDE.md §Service 層の Error 伝播規律 — tracing alone is not enough.
1183        let mut warnings: Vec<String> = Vec::new();
1184
1185        // Cards for this package (single call, reused for stats)
1186        let card_rows = match self.card_store.list(Some(pkg)) {
1187            Ok(rows) => rows,
1188            Err(e) => {
1189                let msg = format!("card store list for '{pkg}': {e}");
1190                tracing::warn!("{}", msg);
1191                warnings.push(msg);
1192                vec![]
1193            }
1194        };
1195        let cards_json = card::summaries_to_json(&card_rows);
1196
1197        // Aliases for this package
1198        let aliases_json = match self.card_store.alias_list(Some(pkg)) {
1199            Ok(rows) => card::aliases_to_json(&rows),
1200            Err(e) => {
1201                let msg = format!("card store alias_list for '{pkg}': {e}");
1202                tracing::warn!("{}", msg);
1203                warnings.push(msg);
1204                serde_json::json!([])
1205            }
1206        };
1207
1208        // Stats: card count, best pass_rate, eval count
1209        let card_count = card_rows.len();
1210        let best_pass_rate = card_rows
1211            .iter()
1212            .filter_map(|c| c.pass_rate)
1213            .fold(f64::NEG_INFINITY, f64::max);
1214        let best_pass_rate = if best_pass_rate.is_finite() {
1215            Some(best_pass_rate)
1216        } else {
1217            None
1218        };
1219
1220        // Eval count from evals directory; corruption warnings surfaced additively.
1221        let eval_count = count_evals_for_pkg(&app_dir, pkg, &mut warnings);
1222
1223        let mut response = serde_json::json!({
1224            "pkg": {
1225                "name": pkg,
1226                "version": version,
1227                "description": description,
1228                "category": category,
1229                "source": source,
1230                "installed": is_installed,
1231            },
1232            "cards": cards_json,
1233            "aliases": aliases_json,
1234            "stats": {
1235                "card_count": card_count,
1236                "eval_count": eval_count,
1237                "best_pass_rate": best_pass_rate,
1238            },
1239        });
1240        if !warnings.is_empty() {
1241            response["warnings"] = serde_json::json!(warnings);
1242        }
1243        Ok(response.to_string())
1244    }
1245
1246    /// Search packages across remote indices + local state.
1247    ///
1248    /// Index URLs are discovered from hub registries, manifest sources,
1249    /// and `AUTO_INSTALL_SOURCES`. Each source is cached independently.
1250    ///
1251    /// ## List-tool options (`opts`)
1252    ///
1253    /// The `opts` parameter carries the list-tool primitives
1254    /// (`limit / sort / filter / fields / verbose`) shared with other
1255    /// list-style MCP tools. Defaults:
1256    ///
1257    /// - `limit` — 50 when `None`. `Some(0)` means **no limit** (return
1258    ///   all matching entries — empty-means-all idiom).
1259    /// - `sort` — `"-installed,name"` when `None` (installed first, then
1260    ///   ascending by name).
1261    /// - `filter` — no additional filter. Legacy `category` /
1262    ///   `installed_only` parameters are merged into the filter map when
1263    ///   `filter` does not already contain those keys (explicit
1264    ///   `filter` wins on conflict).
1265    /// - `fields` / `verbose` — projection is applied to every entry in
1266    ///   the `results` array (see
1267    ///   [`super::list_opts::resolve_fields`]). Top-level keys
1268    ///   (`total`, `sources`, `warnings`) are never projected away.
1269    ///
1270    /// ## docstring handling
1271    ///
1272    /// [`SearchResult::docstring`] is `skip_serializing`, so it is
1273    /// absent from the default serialized view. When the resolved
1274    /// projection contains `"docstring"`, it is re-injected into the
1275    /// per-entry JSON via
1276    /// [`SearchResult::to_value_with_optional_docstring`].
1277    pub(crate) fn hub_search(
1278        &self,
1279        query: Option<&str>,
1280        category: Option<&str>,
1281        installed_only: Option<bool>,
1282        opts: ListOpts,
1283        local_indices: Option<Vec<String>>,
1284    ) -> Result<String, String> {
1285        let app_dir = self.log_config.app_dir();
1286        let (mut remote, mut warnings) = fetch_remote_indices(&app_dir)?;
1287
1288        // Merge local index files (pre-push verification / air-gapped use)
1289        // BEFORE the main `merge` step so that installed packages whose
1290        // metadata appears in a local index are surfaced with their full
1291        // entry (version / source / category) instead of the `Unknown`
1292        // stub produced by `merge`'s local-only fallback path. Each path
1293        // is read and deserialized as a HubIndex; failures go to warnings
1294        // and do not abort the search (partial results > hard failure for
1295        // local verification workflows). Collection results from
1296        // `fetch_remote_indices` take priority on name collisions.
1297        let local_index_paths: Vec<String> = local_indices.clone().unwrap_or_default();
1298        if let Some(paths) = local_indices {
1299            let mut existing: HashSet<String> = remote
1300                .packages
1301                .iter()
1302                .map(|p| p.entity.name.clone())
1303                .collect();
1304            for path in &paths {
1305                match std::fs::read_to_string(path) {
1306                    Err(e) => {
1307                        warnings.push(format!("Failed to read local index {path}: {e}"));
1308                    }
1309                    Ok(raw) => match serde_json::from_str::<HubIndex>(&raw) {
1310                        Err(e) => {
1311                            warnings.push(format!("Failed to parse local index {path}: {e}"));
1312                        }
1313                        Ok(idx) => {
1314                            for entry in idx.packages {
1315                                if existing.insert(entry.entity.name.clone()) {
1316                                    remote.packages.push(entry);
1317                                }
1318                            }
1319                        }
1320                    },
1321                }
1322            }
1323        }
1324
1325        let mut results = merge(&app_dir, &remote)?;
1326
1327        // Filter by query (internal signal covers name/description/
1328        // category/docstring — `matches_query` unchanged).
1329        let query_lower = query.filter(|q| !q.is_empty()).map(|q| q.to_lowercase());
1330        if let Some(ref ql) = query_lower {
1331            results.retain(|r| matches_query(r, ql));
1332        }
1333
1334        // Compute docstring_matched per remaining hit: Some(true) only
1335        // when the query matched docstring and none of {name,
1336        // description, category}; otherwise None.
1337        if let Some(ref ql) = query_lower {
1338            for r in &mut results {
1339                let empty = String::new();
1340                let pkg = &r.entity;
1341                let other_hit = pkg.name.to_lowercase().contains(ql)
1342                    || pkg
1343                        .description
1344                        .as_ref()
1345                        .unwrap_or(&empty)
1346                        .to_lowercase()
1347                        .contains(ql)
1348                    || pkg
1349                        .category
1350                        .as_ref()
1351                        .unwrap_or(&empty)
1352                        .to_lowercase()
1353                        .contains(ql);
1354                let doc_hit = pkg
1355                    .docstring
1356                    .as_ref()
1357                    .unwrap_or(&empty)
1358                    .to_lowercase()
1359                    .contains(ql);
1360                r.docstring_matched = if !other_hit && doc_hit {
1361                    Some(true)
1362                } else {
1363                    None
1364                };
1365            }
1366        }
1367
1368        // Build the effective filter map: start from explicit `opts.filter`,
1369        // then fold legacy `category` / `installed_only` in only if the
1370        // corresponding key is not already set (explicit filter wins).
1371        let mut filter_map: std::collections::HashMap<String, serde_json::Value> =
1372            opts.filter.unwrap_or_default();
1373        if let Some(cat) = category {
1374            filter_map
1375                .entry("category".to_string())
1376                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1377        }
1378        if let Some(only) = installed_only {
1379            // Preserve prior semantic: `installed_only=Some(false)` was a
1380            // no-op (it did not force `installed=false`). Only fold when
1381            // explicitly true.
1382            if only {
1383                filter_map
1384                    .entry("installed".to_string())
1385                    .or_insert(serde_json::Value::Bool(true));
1386            }
1387        }
1388
1389        // Resolve sort keys up-front so an invalid sort string errors out
1390        // before we touch results.
1391        let sort_str = opts.sort.as_deref().unwrap_or("-installed,name");
1392        let sort_keys = parse_sort(sort_str)?;
1393
1394        // Resolve projection fields; this also rejects unknown `verbose`
1395        // values before any heavy work.
1396        let fields = resolve_fields(
1397            opts.verbose.as_deref(),
1398            opts.fields.as_deref(),
1399            HUB_SEARCH_SUMMARY,
1400            HUB_SEARCH_FULL,
1401        )?;
1402        let include_docstring = fields.iter().any(|f| f == "docstring");
1403
1404        // Serialize each result to a Value (docstring optionally attached)
1405        // so filter/sort/projection work uniformly on JSON values.
1406        let mut items: Vec<serde_json::Value> = results
1407            .iter()
1408            .map(|r| r.to_value_with_optional_docstring(include_docstring))
1409            .collect();
1410
1411        // Filter AFTER serialization so filter keys can reference
1412        // projection-level shape (e.g. `category`, `installed`).
1413        if !filter_map.is_empty() {
1414            items.retain(|v| matches_filter(v, &filter_map));
1415        }
1416
1417        // Sort.
1418        apply_sort_by_value(&mut items, &sort_keys);
1419
1420        // Limit. `limit = Some(0)` means "no limit" (return all results)
1421        // — mirrors the `empty=all & some=filter` idiom used across the
1422        // list-tool contract. `None` falls back to the default cap (50).
1423        let total = items.len();
1424        let limit = opts.limit.unwrap_or(50);
1425        if limit > 0 {
1426            items.truncate(limit);
1427        }
1428
1429        // Projection (after truncation — unselected fields are stripped
1430        // from the kept entries only).
1431        let projected: Vec<serde_json::Value> = items
1432            .into_iter()
1433            .map(|v| project_fields(v, &fields))
1434            .collect();
1435
1436        // Collect discovered sources for transparency.
1437        // Warnings from this call (e.g. config.toml parse failure) are
1438        // already present in `warnings` from `fetch_remote_indices` above;
1439        // use a throwaway buffer here to avoid duplicating them.
1440        let mut _src_warnings: Vec<String> = Vec::new();
1441        let mut sources = discover_index_urls(&app_dir, &mut _src_warnings)?;
1442        // Surface local_indices paths in `sources` so callers can see
1443        // what was actually consulted (transparency / debug aid).
1444        sources.extend(local_index_paths);
1445
1446        let mut json = serde_json::json!({
1447            "results": projected,
1448            "total": total,
1449            "sources": sources,
1450        });
1451        if !warnings.is_empty() {
1452            json["warnings"] = serde_json::json!(warnings);
1453        }
1454        Ok(json.to_string())
1455    }
1456
1457    /// Aggregate hub index across all discovered cache sources.
1458    ///
1459    /// Reads the cached `hub_index.json` for each registered source URL
1460    /// (cache-only, no network fetch). Sources that are missing from cache
1461    /// or whose cache file is corrupt are skipped and a warning is collected;
1462    /// the aggregate still succeeds with the remaining sources.
1463    ///
1464    /// Registry-load failures (corrupt `hub_registries.json`) are also
1465    /// demoted to warnings rather than hard errors. Any warnings accumulated
1466    /// before the failure are preserved in the returned `warnings` vec so
1467    /// they reach the MCP wire response.
1468    ///
1469    /// # Returns
1470    /// `Ok((merged_index, warnings))` — always Ok; `warnings` contains any
1471    /// per-source failure messages including registry-load failures.
1472    pub(crate) fn aggregate_index(
1473        &self,
1474    ) -> Result<(HubIndex, Vec<String>), super::error::ServiceError> {
1475        let app_dir = self.log_config.app_dir();
1476        let mut warnings: Vec<String> = Vec::new();
1477
1478        // Discover source URLs (registries + manifest + seeds).
1479        // On failure, demote the error to a warning and return a degraded
1480        // (empty) response. Preserves any warnings already collected
1481        // (e.g. config.toml parse warning) before the failure.
1482        let urls = match discover_index_urls(&app_dir, &mut warnings) {
1483            Ok(u) => u,
1484            Err(e) => {
1485                warnings.push(format!("hub registry discovery failed: {e}"));
1486                return Ok((
1487                    HubIndex {
1488                        schema_version: "hub_index/v0".into(),
1489                        updated_at: String::new(),
1490                        packages: Vec::new(),
1491                    },
1492                    warnings,
1493                ));
1494            }
1495        };
1496
1497        // Empty URL list: return empty index (not an error — fresh install).
1498        if urls.is_empty() {
1499            return Ok((
1500                HubIndex {
1501                    schema_version: "hub_index/v0".into(),
1502                    updated_at: String::new(),
1503                    packages: Vec::new(),
1504                },
1505                warnings,
1506            ));
1507        }
1508
1509        // Load each source from cache. Network fetches are intentionally
1510        // avoided here: resource reads happen synchronously in the MCP
1511        // request path and should not block on network I/O. The cache
1512        // is populated by hub_reindex / hub_search (which do fetch).
1513        // Per-source load failures are best-effort: collect as warnings
1514        // and continue with remaining sources.
1515        let mut all_packages: Vec<IndexEntry> = Vec::new();
1516        let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
1517
1518        for url in &urls {
1519            let merge_packages =
1520                |packages: Vec<IndexEntry>,
1521                 all: &mut Vec<IndexEntry>,
1522                 seen: &mut std::collections::HashSet<String>| {
1523                    for entry in packages {
1524                        if seen.insert(entry.entity.name.clone()) {
1525                            all.push(entry);
1526                        }
1527                    }
1528                };
1529            match load_cached_full(&app_dir, url) {
1530                CacheLookup::Fresh(index) => {
1531                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1532                }
1533                CacheLookup::Stale(index) => {
1534                    // Stale but not absent: merge the data and emit a warning so
1535                    // the caller knows the catalog may be outdated.
1536                    warnings.push(format!(
1537                        "hub cache stale (>{CACHE_TTL_SECS}s) for {url}; run alc_hub_search to refresh"
1538                    ));
1539                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1540                }
1541                CacheLookup::NotPresent => {
1542                    // Cache file absent — not an error, just skip.
1543                }
1544                CacheLookup::Corrupt(e) => {
1545                    // Cache corruption: surface as warning, continue aggregate.
1546                    warnings.push(format!("hub cache read failed for {url}: {e}"));
1547                }
1548            }
1549        }
1550
1551        Ok((
1552            HubIndex {
1553                schema_version: "hub_index/v0".into(),
1554                updated_at: String::new(),
1555                packages: all_packages,
1556            },
1557            warnings,
1558        ))
1559    }
1560}
1561
1562#[cfg(test)]
1563mod tests {
1564    use super::*;
1565
1566    #[test]
1567    fn repo_to_index_url_github() {
1568        assert_eq!(
1569            repo_to_index_url("https://github.com/ynishi/algocline-bundled-packages"),
1570            Some(
1571                "https://raw.githubusercontent.com/ynishi/algocline-bundled-packages/main/hub_index.json"
1572                    .to_string()
1573            )
1574        );
1575    }
1576
1577    #[test]
1578    fn repo_to_index_url_github_trailing_slash() {
1579        assert_eq!(
1580            repo_to_index_url("https://github.com/user/repo/"),
1581            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1582        );
1583    }
1584
1585    #[test]
1586    fn repo_to_index_url_github_dot_git() {
1587        assert_eq!(
1588            repo_to_index_url("https://github.com/user/repo.git"),
1589            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1590        );
1591    }
1592
1593    #[test]
1594    fn repo_to_index_url_direct_json() {
1595        assert_eq!(
1596            repo_to_index_url("https://example.com/my_index.json"),
1597            Some("https://example.com/my_index.json".to_string())
1598        );
1599    }
1600
1601    #[test]
1602    fn repo_to_index_url_unknown_host_no_json() {
1603        assert_eq!(repo_to_index_url("https://example.com/some-repo"), None);
1604    }
1605
1606    #[test]
1607    fn repo_to_index_url_local_path() {
1608        assert_eq!(repo_to_index_url("/home/user/my-pkg"), None);
1609    }
1610
1611    #[test]
1612    fn cache_key_stable() {
1613        let k1 = cache_key("https://example.com/index.json");
1614        let k2 = cache_key("https://example.com/index.json");
1615        assert_eq!(k1, k2);
1616        assert_eq!(k1.len(), 16); // 16 hex chars
1617    }
1618
1619    #[test]
1620    fn cache_key_different_urls() {
1621        let k1 = cache_key("https://a.com/index.json");
1622        let k2 = cache_key("https://b.com/index.json");
1623        assert_ne!(k1, k2);
1624    }
1625
1626    // NOTE: The init.lua meta / docstring parsing tests have moved to
1627    // `algocline_core::pkg::tests` along with the parser itself. The
1628    // `hub.rs` call-path tests now exercise the typed `PkgEntity` via
1629    // `build_index` / `merge` only.
1630
1631    #[test]
1632    fn merge_dedup_uses_hashset() {
1633        // Verify that merge correctly handles local-only packages
1634        // without O(n*m) behavior (structural test).
1635        let tmp = tempfile::tempdir().unwrap();
1636        let app_dir = AppDir::new(tmp.path().to_path_buf());
1637        let remote = HubIndex {
1638            schema_version: "hub_index/v0".into(),
1639            updated_at: String::new(),
1640            packages: vec![IndexEntry {
1641                entity: PkgEntity {
1642                    name: "remote_only".into(),
1643                    version: Some("1.0".into()),
1644                    description: Some("from remote".into()),
1645                    category: Some("test".into()),
1646                    docstring: None,
1647                },
1648                source: PackageSource::Unknown,
1649                card_count: 0,
1650                best_card: None,
1651            }],
1652        };
1653
1654        let results = merge(&app_dir, &remote).expect("merge over empty app_dir should succeed");
1655        // Should include remote_only + any locally installed packages
1656        assert!(results.iter().any(|r| r.entity.name == "remote_only"));
1657    }
1658
1659    #[test]
1660    fn matches_query_searches_docstring() {
1661        let result = SearchResult {
1662            entity: PkgEntity {
1663                name: "cascade".into(),
1664                version: Some("0.1.0".into()),
1665                description: Some("Multi-level routing".into()),
1666                category: Some("meta".into()),
1667                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1668            },
1669            source: PackageSource::Unknown,
1670            installed: true,
1671            card_count: 0,
1672            best_card: None,
1673            docstring_matched: None,
1674        };
1675
1676        assert!(matches_query(&result, "thompson"), "docstring match");
1677        assert!(matches_query(&result, "FrugalGPT"), "docstring match case");
1678        assert!(matches_query(&result, "routing"), "description match");
1679        assert!(!matches_query(&result, "bayesian"), "no match");
1680    }
1681
1682    // ─── SearchResult::to_value_with_optional_docstring ────────────
1683    //
1684    // `docstring` is not emitted by the default serde path (via the
1685    // `serialize_entity_without_docstring` custom serializer) and is
1686    // re-attached only when the projection path says so. These tests
1687    // pin the two branches of that helper — they are the hinge that
1688    // `verbose="full"` / `fields=["docstring"]` rely on.
1689
1690    fn sample_search_result() -> SearchResult {
1691        SearchResult {
1692            entity: PkgEntity {
1693                name: "cascade".into(),
1694                version: Some("0.1.0".into()),
1695                description: Some("Multi-level routing".into()),
1696                category: Some("reasoning".into()),
1697                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1698            },
1699            source: PackageSource::Git {
1700                url: "https://example.com/cascade".into(),
1701                rev: None,
1702            },
1703            installed: true,
1704            card_count: 3,
1705            best_card: None,
1706            docstring_matched: None,
1707        }
1708    }
1709
1710    #[test]
1711    fn to_value_default_omits_docstring() {
1712        let r = sample_search_result();
1713        let v = r.to_value_with_optional_docstring(false);
1714        let obj = v.as_object().expect("object");
1715        assert!(
1716            !obj.contains_key("docstring"),
1717            "default summary must not leak docstring"
1718        );
1719        assert_eq!(obj.get("name").and_then(|x| x.as_str()), Some("cascade"));
1720        // `docstring_matched` is Option<None> → `skip_serializing_if`
1721        // must omit it when the query did not mark a docstring-only hit.
1722        assert!(
1723            !obj.contains_key("docstring_matched"),
1724            "docstring_matched=None must be omitted"
1725        );
1726    }
1727
1728    #[test]
1729    fn to_value_include_reattaches_docstring() {
1730        let r = sample_search_result();
1731        let v = r.to_value_with_optional_docstring(true);
1732        let obj = v.as_object().expect("object");
1733        assert_eq!(
1734            obj.get("docstring").and_then(|x| x.as_str()),
1735            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1736        );
1737    }
1738
1739    #[test]
1740    fn to_value_serializes_docstring_matched_when_set() {
1741        let mut r = sample_search_result();
1742        r.docstring_matched = Some(true);
1743        let v = r.to_value_with_optional_docstring(false);
1744        let obj = v.as_object().expect("object");
1745        assert_eq!(
1746            obj.get("docstring_matched").and_then(|x| x.as_bool()),
1747            Some(true)
1748        );
1749    }
1750
1751    // ─── projection glue ──────────────────────────────────────────
1752    //
1753    // These tests exercise the projection path that `hub_search` uses to
1754    // shape output: `resolve_fields` + `project_fields` applied to a
1755    // `to_value_with_optional_docstring`-serialized entry. They pin the
1756    // wf-sim-verbose contract: `fields` wins over `verbose`, default
1757    // summary preset excludes docstring, `full` preset includes
1758    // docstring, unknown keys silently skipped.
1759
1760    #[test]
1761    fn hub_search_default_summary_excludes_docstring() {
1762        let r = sample_search_result();
1763        let fields = resolve_fields(None, None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1764        let include_docstring = fields.iter().any(|f| f == "docstring");
1765        let v = project_fields(
1766            r.to_value_with_optional_docstring(include_docstring),
1767            &fields,
1768        );
1769        let obj = v.as_object().expect("object");
1770        assert!(
1771            !obj.contains_key("docstring"),
1772            "summary preset must omit docstring"
1773        );
1774        // summary preset fields that are present on the sample entry
1775        for key in ["name", "version", "description", "category", "installed"] {
1776            assert!(obj.contains_key(key), "summary preset key {key} missing");
1777        }
1778    }
1779
1780    #[test]
1781    fn hub_search_verbose_full_includes_docstring() {
1782        let r = sample_search_result();
1783        let fields =
1784            resolve_fields(Some("full"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1785        let include_docstring = fields.iter().any(|f| f == "docstring");
1786        let v = project_fields(
1787            r.to_value_with_optional_docstring(include_docstring),
1788            &fields,
1789        );
1790        let obj = v.as_object().expect("object");
1791        assert_eq!(
1792            obj.get("docstring").and_then(|x| x.as_str()),
1793            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1794        );
1795        // full preset superset keys
1796        for key in ["source", "card_count"] {
1797            assert!(obj.contains_key(key), "full preset key {key} missing");
1798        }
1799    }
1800
1801    #[test]
1802    fn hub_search_fields_beats_verbose() {
1803        let r = sample_search_result();
1804        let explicit = vec!["name".to_string(), "docstring".to_string()];
1805        // verbose=summary normally excludes docstring, but explicit
1806        // fields must win.
1807        let fields = resolve_fields(
1808            Some("summary"),
1809            Some(&explicit),
1810            HUB_SEARCH_SUMMARY,
1811            HUB_SEARCH_FULL,
1812        )
1813        .unwrap();
1814        let include_docstring = fields.iter().any(|f| f == "docstring");
1815        let v = project_fields(
1816            r.to_value_with_optional_docstring(include_docstring),
1817            &fields,
1818        );
1819        let obj = v.as_object().expect("object");
1820        assert_eq!(obj.len(), 2, "only the two requested fields");
1821        assert!(obj.contains_key("name"));
1822        assert!(obj.contains_key("docstring"));
1823    }
1824
1825    #[test]
1826    fn hub_search_fields_unknown_key_silently_skipped() {
1827        let r = sample_search_result();
1828        let explicit = vec!["name".to_string(), "bogus".to_string()];
1829        let fields =
1830            resolve_fields(None, Some(&explicit), HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1831        let v = project_fields(r.to_value_with_optional_docstring(false), &fields);
1832        let obj = v.as_object().expect("object");
1833        assert_eq!(obj.len(), 1, "bogus must not appear");
1834        assert!(obj.contains_key("name"));
1835    }
1836
1837    #[test]
1838    fn hub_search_invalid_verbose_errors() {
1839        let err =
1840            resolve_fields(Some("fat"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap_err();
1841        assert!(
1842            err.contains("fat"),
1843            "error must mention the offending value"
1844        );
1845    }
1846
1847    // ─── docstring_matched classification ─────────────────────────
1848    //
1849    // The query-time classification rule: `docstring_matched = Some(true)`
1850    // only when the query hit docstring AND missed name/description/
1851    // category; otherwise `None` (and therefore omitted from output).
1852    // The logic lives inline in `hub_search`; we re-create it here over a
1853    // tiny local helper so the three cases stay pinned as a contract.
1854
1855    fn classify(r: &SearchResult, query: &str) -> Option<bool> {
1856        let ql = query.to_lowercase();
1857        if query.is_empty() {
1858            return None;
1859        }
1860        let empty = String::new();
1861        let pkg = &r.entity;
1862        let other_hit = pkg.name.to_lowercase().contains(&ql)
1863            || pkg
1864                .description
1865                .as_ref()
1866                .unwrap_or(&empty)
1867                .to_lowercase()
1868                .contains(&ql)
1869            || pkg
1870                .category
1871                .as_ref()
1872                .unwrap_or(&empty)
1873                .to_lowercase()
1874                .contains(&ql);
1875        let doc_hit = pkg
1876            .docstring
1877            .as_ref()
1878            .unwrap_or(&empty)
1879            .to_lowercase()
1880            .contains(&ql);
1881        if !other_hit && doc_hit {
1882            Some(true)
1883        } else {
1884            None
1885        }
1886    }
1887
1888    #[test]
1889    fn docstring_matched_true_when_only_docstring_hits() {
1890        let r = sample_search_result();
1891        // "Thompson" appears only in docstring of the sample entry.
1892        assert_eq!(classify(&r, "thompson"), Some(true));
1893    }
1894
1895    #[test]
1896    fn docstring_matched_none_when_name_also_hits() {
1897        let r = sample_search_result();
1898        // "cascade" hits the name; docstring match is irrelevant now.
1899        assert_eq!(classify(&r, "cascade"), None);
1900    }
1901
1902    #[test]
1903    fn docstring_matched_none_when_description_hits() {
1904        let r = sample_search_result();
1905        // "routing" hits description; should be None.
1906        assert_eq!(classify(&r, "routing"), None);
1907    }
1908
1909    #[test]
1910    fn docstring_matched_none_when_query_empty() {
1911        let r = sample_search_result();
1912        assert_eq!(classify(&r, ""), None);
1913    }
1914
1915    // ─── filter fold (legacy params → filter map) ─────────────────
1916    //
1917    // Behavioural rule: legacy `category` / `installed_only=true` fold
1918    // into the filter map only when the corresponding key is not
1919    // already set (explicit `filter` wins). `installed_only=false` is a
1920    // no-op (preserves prior semantics).
1921
1922    fn build_filter_map(
1923        category: Option<&str>,
1924        installed_only: Option<bool>,
1925        explicit: Option<HashMap<String, serde_json::Value>>,
1926    ) -> HashMap<String, serde_json::Value> {
1927        let mut filter_map = explicit.unwrap_or_default();
1928        if let Some(cat) = category {
1929            filter_map
1930                .entry("category".to_string())
1931                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1932        }
1933        if let Some(only) = installed_only {
1934            if only {
1935                filter_map
1936                    .entry("installed".to_string())
1937                    .or_insert(serde_json::Value::Bool(true));
1938            }
1939        }
1940        filter_map
1941    }
1942
1943    #[test]
1944    fn filter_by_category_via_legacy_param() {
1945        let m = build_filter_map(Some("reasoning"), None, None);
1946        assert_eq!(
1947            m.get("category"),
1948            Some(&serde_json::Value::String("reasoning".to_string()))
1949        );
1950    }
1951
1952    #[test]
1953    fn filter_by_installed_only_via_legacy_param() {
1954        let m = build_filter_map(None, Some(true), None);
1955        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1956    }
1957
1958    #[test]
1959    fn filter_installed_only_false_is_noop() {
1960        let m = build_filter_map(None, Some(false), None);
1961        assert!(
1962            !m.contains_key("installed"),
1963            "installed_only=false should not fold in"
1964        );
1965    }
1966
1967    #[test]
1968    fn filter_beats_legacy_param_on_conflict() {
1969        // Explicit filter says category=meta; legacy says reasoning.
1970        // Explicit must win.
1971        let mut explicit = HashMap::new();
1972        explicit.insert(
1973            "category".to_string(),
1974            serde_json::Value::String("meta".to_string()),
1975        );
1976        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1977        assert_eq!(
1978            m.get("category"),
1979            Some(&serde_json::Value::String("meta".to_string()))
1980        );
1981    }
1982
1983    #[test]
1984    fn filter_merges_legacy_when_no_conflict() {
1985        // Explicit sets a different key; legacy category should still
1986        // be folded in.
1987        let mut explicit = HashMap::new();
1988        explicit.insert("installed".to_string(), serde_json::Value::Bool(true));
1989        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1990        assert_eq!(
1991            m.get("category"),
1992            Some(&serde_json::Value::String("reasoning".to_string()))
1993        );
1994        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1995    }
1996
1997    // ─── load_registries: file-absent vs. corrupt JSON ────────────
1998
1999    #[test]
2000    fn load_registries_missing_file_returns_default() {
2001        let tmp = tempfile::tempdir().unwrap();
2002        let app_dir = AppDir::new(tmp.path().to_path_buf());
2003        // No hub_registries.json created — must return Ok(empty).
2004        let result = load_registries(&app_dir);
2005        assert!(result.is_ok(), "missing file should be Ok: {result:?}");
2006        assert!(result.unwrap().registries.is_empty());
2007    }
2008
2009    #[test]
2010    fn load_registries_corrupt_json_returns_err() {
2011        let tmp = tempfile::tempdir().unwrap();
2012        let app_dir = AppDir::new(tmp.path().to_path_buf());
2013        // Write corrupt JSON to the registries path.
2014        let path = app_dir.hub_registries_json();
2015        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2016        std::fs::write(&path, b"not valid json {{{").unwrap();
2017        let result = load_registries(&app_dir);
2018        assert!(result.is_err(), "corrupt JSON must propagate Err");
2019        let msg = result.unwrap_err().to_string();
2020        assert!(
2021            msg.contains("parse"),
2022            "error message should mention parse: {msg}"
2023        );
2024    }
2025
2026    #[test]
2027    fn load_registries_valid_file_deserializes() {
2028        let tmp = tempfile::tempdir().unwrap();
2029        let app_dir = AppDir::new(tmp.path().to_path_buf());
2030        let path = app_dir.hub_registries_json();
2031        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2032        let content = r#"{"registries":[{"source":"https://github.com/user/repo","origin":"pkg_install","added_at":"2026-01-01T00:00:00Z"}]}"#;
2033        std::fs::write(&path, content).unwrap();
2034        let result = load_registries(&app_dir);
2035        assert!(result.is_ok(), "valid JSON must parse Ok: {result:?}");
2036        let reg = result.unwrap();
2037        assert_eq!(reg.registries.len(), 1);
2038        assert_eq!(reg.registries[0].source, "https://github.com/user/repo");
2039    }
2040
2041    // ─── default sort verification ────────────────────────────────
2042
2043    #[test]
2044    fn default_sort_is_minus_installed_name() {
2045        let keys = parse_sort("-installed,name").unwrap();
2046        assert_eq!(keys.len(), 2);
2047        assert_eq!(keys[0].key, "installed");
2048        assert!(keys[0].desc, "installed must sort desc (true first)");
2049        assert_eq!(keys[1].key, "name");
2050        assert!(!keys[1].desc);
2051
2052        // Apply it against a small vec and confirm the expected order.
2053        let mut items = vec![
2054            serde_json::json!({"installed": false, "name": "zeta"}),
2055            serde_json::json!({"installed": true, "name": "mu"}),
2056            serde_json::json!({"installed": false, "name": "alpha"}),
2057            serde_json::json!({"installed": true, "name": "beta"}),
2058        ];
2059        apply_sort_by_value(&mut items, &keys);
2060        let names: Vec<&str> = items
2061            .iter()
2062            .map(|v| v.get("name").and_then(|x| x.as_str()).unwrap_or(""))
2063            .collect();
2064        assert_eq!(names, vec!["beta", "mu", "alpha", "zeta"]);
2065    }
2066
2067    // ─── Phase 3 MED batch: error-propagation tests ───────────────
2068
2069    // Site 1: collection_url_from_config
2070
2071    #[test]
2072    fn collection_url_from_config_absent_returns_ok_none() {
2073        let tmp = tempfile::tempdir().unwrap();
2074        let app_dir = AppDir::new(tmp.path().to_path_buf());
2075        // No config.toml created — absent file must be Ok(None), not Err.
2076        let result = collection_url_from_config(&app_dir);
2077        assert!(
2078            matches!(result, Ok(None)),
2079            "absent config.toml must return Ok(None), got {result:?}"
2080        );
2081    }
2082
2083    #[test]
2084    fn collection_url_from_config_corrupt_toml_returns_err() {
2085        let tmp = tempfile::tempdir().unwrap();
2086        let app_dir = AppDir::new(tmp.path().to_path_buf());
2087        let path = app_dir.config_toml();
2088        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2089        std::fs::write(&path, b"[hub\ncollection_url = broken{{{{").unwrap();
2090        let result = collection_url_from_config(&app_dir);
2091        assert!(
2092            result.is_err(),
2093            "corrupt TOML must return Err, got {result:?}"
2094        );
2095    }
2096
2097    #[test]
2098    fn collection_url_from_config_valid_returns_url() {
2099        let tmp = tempfile::tempdir().unwrap();
2100        let app_dir = AppDir::new(tmp.path().to_path_buf());
2101        let path = app_dir.config_toml();
2102        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2103        std::fs::write(
2104            &path,
2105            b"[hub]\ncollection_url = \"https://example.com/hub_index.json\"\n",
2106        )
2107        .unwrap();
2108        let result = collection_url_from_config(&app_dir);
2109        assert_eq!(
2110            result.unwrap(),
2111            Some("https://example.com/hub_index.json".to_string())
2112        );
2113    }
2114
2115    #[test]
2116    fn collection_url_from_config_no_hub_section_returns_none() {
2117        let tmp = tempfile::tempdir().unwrap();
2118        let app_dir = AppDir::new(tmp.path().to_path_buf());
2119        let path = app_dir.config_toml();
2120        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2121        std::fs::write(&path, b"[some_other_section]\nfoo = \"bar\"\n").unwrap();
2122        let result = collection_url_from_config(&app_dir);
2123        assert!(
2124            matches!(result, Ok(None)),
2125            "config without [hub] must return Ok(None), got {result:?}"
2126        );
2127    }
2128
2129    // Site 2: load_cached
2130
2131    #[test]
2132    fn load_cached_absent_returns_ok_none() {
2133        let tmp = tempfile::tempdir().unwrap();
2134        let app_dir = AppDir::new(tmp.path().to_path_buf());
2135        let result = load_cached(&app_dir, "https://example.com/index.json");
2136        assert!(
2137            matches!(result, Ok(None)),
2138            "absent cache file must return Ok(None), got {result:?}"
2139        );
2140    }
2141
2142    #[test]
2143    fn load_cached_corrupt_json_within_ttl_returns_err() {
2144        let tmp = tempfile::tempdir().unwrap();
2145        let app_dir = AppDir::new(tmp.path().to_path_buf());
2146        let url = "https://example.com/index.json";
2147        let dir = cache_dir(&app_dir);
2148        std::fs::create_dir_all(&dir).unwrap();
2149        let path = dir.join(format!("{}.json", cache_key(url)));
2150        std::fs::write(&path, b"not valid json {{{{").unwrap();
2151        // file is freshly written so within TTL
2152        let result = load_cached(&app_dir, url);
2153        assert!(
2154            result.is_err(),
2155            "corrupt JSON within TTL must return Err, got {result:?}"
2156        );
2157    }
2158
2159    #[test]
2160    fn load_cached_valid_json_within_ttl_returns_index() {
2161        let tmp = tempfile::tempdir().unwrap();
2162        let app_dir = AppDir::new(tmp.path().to_path_buf());
2163        let url = "https://example.com/index.json";
2164        let dir = cache_dir(&app_dir);
2165        std::fs::create_dir_all(&dir).unwrap();
2166        let path = dir.join(format!("{}.json", cache_key(url)));
2167        let index_json = r#"{"schema_version":"hub_index/v0","updated_at":"2026-01-01T00:00:00Z","packages":[]}"#;
2168        std::fs::write(&path, index_json).unwrap();
2169        let result = load_cached(&app_dir, url);
2170        assert!(
2171            matches!(result, Ok(Some(_))),
2172            "valid JSON within TTL must return Ok(Some(_)), got {result:?}"
2173        );
2174    }
2175
2176    /// Helper: backdate a file's mtime by `secs` seconds so it appears stale.
2177    fn backdate_file(path: &std::path::Path, secs: u64) {
2178        let past = std::time::SystemTime::now() - std::time::Duration::from_secs(secs);
2179        let times = std::fs::FileTimes::new()
2180            .set_accessed(past)
2181            .set_modified(past);
2182        let f = std::fs::OpenOptions::new()
2183            .write(true)
2184            .open(path)
2185            .expect("open for backdate");
2186        f.set_times(times).expect("set_times");
2187    }
2188
2189    // L-1: load_cached_full returns Stale (not NotPresent) for outdated cache.
2190    #[test]
2191    fn load_cached_full_stale_file_returns_stale_variant() {
2192        let tmp = tempfile::tempdir().unwrap();
2193        let app_dir = AppDir::new(tmp.path().to_path_buf());
2194        let url = "https://stale.example.com/index.json";
2195        // Write a valid cache entry using the helper to get correct serialization.
2196        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2197        // Backdate by 2× TTL to ensure it's stale.
2198        let path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2199        backdate_file(&path, CACHE_TTL_SECS * 2);
2200        let result = load_cached_full(&app_dir, url);
2201        assert!(
2202            matches!(result, CacheLookup::Stale(_)),
2203            "backdated cache must return Stale variant"
2204        );
2205    }
2206
2207    // L-1: aggregate_index with stale cache returns data AND emits warning.
2208    #[tokio::test]
2209    async fn aggregate_index_stale_cache_returns_data_and_warning() {
2210        let tmp = tempfile::tempdir().unwrap();
2211        let app_dir_root = tmp.path().to_path_buf();
2212        let app_dir = AppDir::new(app_dir_root.clone());
2213        let url = "https://stale-agg.example.com/index.json";
2214
2215        // Write a valid cache file with one package.
2216        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2217        // Backdate the cache file so it's stale.
2218        let cache_path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2219        backdate_file(&cache_path, CACHE_TTL_SECS * 2);
2220
2221        // Register the URL in hub_registries.
2222        let reg_path = app_dir.hub_registries_json();
2223        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2224        let reg_json = serde_json::json!({
2225            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2226        });
2227        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2228
2229        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2230        let (index, warnings) = AppService::aggregate_index(&svc).unwrap();
2231
2232        // Data from stale cache must still be present.
2233        assert!(
2234            index.packages.iter().any(|p| p.entity.name == "stale_pkg"),
2235            "stale package must be included in aggregate, got: {:?}",
2236            index
2237                .packages
2238                .iter()
2239                .map(|p| &p.entity.name)
2240                .collect::<Vec<_>>()
2241        );
2242        // A stale warning must be emitted.
2243        assert!(
2244            warnings
2245                .iter()
2246                .any(|w| w.contains("stale") && w.contains(url)),
2247            "stale cache must emit a warning mentioning the URL, got: {warnings:?}"
2248        );
2249    }
2250
2251    // Site 3: count_evals_for_pkg
2252
2253    #[test]
2254    fn count_evals_for_pkg_absent_dir_returns_zero_no_warnings() {
2255        let tmp = tempfile::tempdir().unwrap();
2256        let app_dir = AppDir::new(tmp.path().to_path_buf());
2257        let mut warnings: Vec<String> = Vec::new();
2258        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2259        assert_eq!(count, 0, "absent evals dir must return 0");
2260        assert!(
2261            warnings.is_empty(),
2262            "absent evals dir must produce no warnings, got {warnings:?}"
2263        );
2264    }
2265
2266    #[test]
2267    fn count_evals_for_pkg_corrupt_meta_surfaces_warning() {
2268        let tmp = tempfile::tempdir().unwrap();
2269        let app_dir = AppDir::new(tmp.path().to_path_buf());
2270        let evals_dir = app_dir.evals_dir();
2271        std::fs::create_dir_all(&evals_dir).unwrap();
2272
2273        // Write a result JSON stub so the file is scanned.
2274        std::fs::write(evals_dir.join("cot_9999.json"), b"{}").unwrap();
2275        // Write a corrupt meta.json for the same stem.
2276        std::fs::write(evals_dir.join("cot_9999.meta.json"), b"not json {{{{").unwrap();
2277
2278        let mut warnings: Vec<String> = Vec::new();
2279        let _count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2280        assert!(
2281            !warnings.is_empty(),
2282            "corrupt meta.json must produce at least one warning, got {warnings:?}"
2283        );
2284        assert!(
2285            warnings[0].contains("parse"),
2286            "warning must mention parse: {}",
2287            warnings[0]
2288        );
2289    }
2290
2291    #[test]
2292    fn count_evals_for_pkg_valid_meta_counts_correctly() {
2293        let tmp = tempfile::tempdir().unwrap();
2294        let app_dir = AppDir::new(tmp.path().to_path_buf());
2295        let evals_dir = app_dir.evals_dir();
2296        std::fs::create_dir_all(&evals_dir).unwrap();
2297
2298        // Write a result JSON + valid meta for strategy "cot".
2299        let meta = r#"{"eval_id":"cot_1","strategy":"cot","timestamp":1}"#;
2300        std::fs::write(evals_dir.join("cot_1.json"), b"{}").unwrap();
2301        std::fs::write(evals_dir.join("cot_1.meta.json"), meta).unwrap();
2302
2303        let mut warnings: Vec<String> = Vec::new();
2304        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2305        assert_eq!(count, 1, "should count 1 valid eval");
2306        assert!(warnings.is_empty(), "no warnings expected: {warnings:?}");
2307    }
2308
2309    // ─── aggregate_index unit tests ───────────────────────────────
2310
2311    /// Write a minimal HubIndex JSON to the per-source cache for a URL.
2312    fn write_cache_for_url(app_dir: &AppDir, url: &str, index: &HubIndex) {
2313        let dir = cache_dir(app_dir);
2314        std::fs::create_dir_all(&dir).unwrap();
2315        let path = dir.join(format!("{}.json", cache_key(url)));
2316        // justification: test helper, panicking on failure is acceptable in tests
2317        std::fs::write(&path, serde_json::to_string_pretty(index).unwrap()).unwrap();
2318    }
2319
2320    fn make_index(packages: Vec<(&str, &str)>) -> HubIndex {
2321        HubIndex {
2322            schema_version: "hub_index/v0".into(),
2323            updated_at: String::new(),
2324            packages: packages
2325                .into_iter()
2326                .map(|(name, version)| IndexEntry {
2327                    entity: PkgEntity {
2328                        name: name.to_string(),
2329                        version: Some(version.to_string()),
2330                        description: None,
2331                        category: None,
2332                        docstring: None,
2333                    },
2334                    source: PackageSource::Unknown,
2335                    card_count: 0,
2336                    best_card: None,
2337                })
2338                .collect(),
2339        }
2340    }
2341
2342    // T1: empty sources → empty index, no warnings
2343    #[test]
2344    fn aggregate_index_empty_sources_returns_empty() {
2345        let tmp = tempfile::tempdir().unwrap();
2346        let app_dir = AppDir::new(tmp.path().to_path_buf());
2347        // No registries, no manifest, no seeds in cache → no URLs → empty index.
2348        // discover_index_urls will still produce AUTO_INSTALL_SOURCES seeds,
2349        // but their cache files don't exist → Ok(None) for each → empty result.
2350        let (index, warnings) = {
2351            // Build a minimal AppService-like test by calling the free functions
2352            // and replicating the aggregate_index logic directly.
2353            let mut w: Vec<String> = Vec::new();
2354            let urls = discover_index_urls(&app_dir, &mut w).unwrap();
2355            let mut packages: Vec<IndexEntry> = Vec::new();
2356            let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2357            for url in &urls {
2358                if let Ok(Some(idx)) = load_cached(&app_dir, url) {
2359                    for e in idx.packages {
2360                        if seen.insert(e.entity.name.clone()) {
2361                            packages.push(e);
2362                        }
2363                    }
2364                }
2365            }
2366            (
2367                HubIndex {
2368                    schema_version: "hub_index/v0".into(),
2369                    updated_at: String::new(),
2370                    packages,
2371                },
2372                w,
2373            )
2374        };
2375        assert!(
2376            index.packages.is_empty(),
2377            "no cached sources should produce empty packages"
2378        );
2379        assert!(warnings.is_empty(), "no warnings expected for cache misses");
2380    }
2381
2382    // T1: one source in cache → packages returned
2383    #[test]
2384    fn aggregate_index_one_source_returns_packages() {
2385        let tmp = tempfile::tempdir().unwrap();
2386        let app_dir = AppDir::new(tmp.path().to_path_buf());
2387        let url = "https://example.com/test_index.json";
2388        let source_index = make_index(vec![("cot", "0.1.0"), ("ucb", "0.2.0")]);
2389        write_cache_for_url(&app_dir, url, &source_index);
2390
2391        // Register the URL in hub_registries so discover_index_urls finds it.
2392        let reg_path = app_dir.hub_registries_json();
2393        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2394        let reg_json = serde_json::json!({
2395            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2396        });
2397        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2398
2399        let mut warnings: Vec<String> = Vec::new();
2400        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2401        let mut packages: Vec<IndexEntry> = Vec::new();
2402        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2403        for u in &urls {
2404            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2405                for e in idx.packages {
2406                    if seen.insert(e.entity.name.clone()) {
2407                        packages.push(e);
2408                    }
2409                }
2410            }
2411        }
2412
2413        assert!(
2414            packages.iter().any(|p| p.entity.name == "cot"),
2415            "cot expected"
2416        );
2417        assert!(
2418            packages.iter().any(|p| p.entity.name == "ucb"),
2419            "ucb expected"
2420        );
2421    }
2422
2423    // T2: duplicate package across two sources → first source wins
2424    #[test]
2425    fn aggregate_index_deduplicate_by_name_first_wins() {
2426        let tmp = tempfile::tempdir().unwrap();
2427        let app_dir = AppDir::new(tmp.path().to_path_buf());
2428        let url_a = "https://a.example.com/index.json";
2429        let url_b = "https://b.example.com/index.json";
2430
2431        // Both sources have "cot" but different versions.
2432        let idx_a = make_index(vec![("cot", "1.0.0")]);
2433        let idx_b = make_index(vec![("cot", "2.0.0"), ("ucb", "0.1.0")]);
2434        write_cache_for_url(&app_dir, url_a, &idx_a);
2435        write_cache_for_url(&app_dir, url_b, &idx_b);
2436
2437        let reg_path = app_dir.hub_registries_json();
2438        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2439        let reg_json = serde_json::json!({
2440            "registries": [
2441                {"source": url_a, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"},
2442                {"source": url_b, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}
2443            ]
2444        });
2445        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2446
2447        let mut warnings: Vec<String> = Vec::new();
2448        let urls = {
2449            let mut raw = discover_index_urls(&app_dir, &mut warnings).unwrap();
2450            // Restrict to only our two test URLs so seed URLs don't interfere.
2451            raw.retain(|u| u == url_a || u == url_b);
2452            raw
2453        };
2454
2455        let mut packages: Vec<IndexEntry> = Vec::new();
2456        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2457        for u in &urls {
2458            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2459                for e in idx.packages {
2460                    if seen.insert(e.entity.name.clone()) {
2461                        packages.push(e);
2462                    }
2463                }
2464            }
2465        }
2466
2467        let cot_count = packages.iter().filter(|p| p.entity.name == "cot").count();
2468        assert_eq!(cot_count, 1, "dedup: cot must appear exactly once");
2469        let ucb_count = packages.iter().filter(|p| p.entity.name == "ucb").count();
2470        assert_eq!(ucb_count, 1, "ucb from second source must appear");
2471    }
2472
2473    // T3: corrupt cache file → warning collected, other sources unaffected
2474    #[test]
2475    fn aggregate_index_corrupt_cache_collects_warning() {
2476        let tmp = tempfile::tempdir().unwrap();
2477        let app_dir = AppDir::new(tmp.path().to_path_buf());
2478        let url_corrupt = "https://corrupt.example.com/index.json";
2479
2480        // Write corrupt JSON to the cache slot.
2481        let dir = cache_dir(&app_dir);
2482        std::fs::create_dir_all(&dir).unwrap();
2483        let path = dir.join(format!("{}.json", cache_key(url_corrupt)));
2484        std::fs::write(&path, b"{{{{ not valid json").unwrap();
2485
2486        let reg_path = app_dir.hub_registries_json();
2487        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2488        let reg_json = serde_json::json!({
2489            "registries": [{"source": url_corrupt, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2490        });
2491        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2492
2493        let mut warnings: Vec<String> = Vec::new();
2494        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2495        let mut packages: Vec<IndexEntry> = Vec::new();
2496        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2497        let mut extra_warnings: Vec<String> = Vec::new();
2498        for u in &urls {
2499            match load_cached(&app_dir, u) {
2500                Ok(Some(idx)) => {
2501                    for e in idx.packages {
2502                        if seen.insert(e.entity.name.clone()) {
2503                            packages.push(e);
2504                        }
2505                    }
2506                }
2507                Ok(None) => {}
2508                Err(e) => extra_warnings.push(format!("hub cache read failed for {u}: {e}")),
2509            }
2510        }
2511
2512        assert!(
2513            !extra_warnings.is_empty(),
2514            "corrupt cache must produce a warning"
2515        );
2516        assert!(
2517            extra_warnings[0].contains("hub cache read failed"),
2518            "warning text mismatch: {}",
2519            extra_warnings[0]
2520        );
2521        assert!(packages.is_empty(), "no packages from corrupt source");
2522    }
2523
2524    // M-2: registry-load failure is demoted to a warning; accumulated
2525    // warnings before the failure are preserved in the returned vec.
2526    #[tokio::test]
2527    async fn aggregate_index_registry_failure_returns_ok_with_warning() {
2528        let tmp = tempfile::tempdir().unwrap();
2529        let app_dir_root = tmp.path().to_path_buf();
2530
2531        // Write corrupt hub_registries.json so load_registries fails.
2532        let reg_path = AppDir::new(app_dir_root.clone()).hub_registries_json();
2533        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2534        std::fs::write(&reg_path, b"{{{{ not valid json").unwrap();
2535
2536        // Also write a corrupt config.toml to generate a pre-registry warning.
2537        // (config.toml hub.collection_url parse warns before the registry step.)
2538        // We skip this to keep the test minimal — just verify registry failure
2539        // demotes to warning and result is Ok.
2540
2541        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2542        let result = AppService::aggregate_index(&svc);
2543        assert!(
2544            result.is_ok(),
2545            "aggregate_index must return Ok even on registry-load failure, got: {result:?}"
2546        );
2547        let (index, warnings) = result.unwrap();
2548        assert!(
2549            index.packages.is_empty(),
2550            "degraded response must have empty packages"
2551        );
2552        assert!(
2553            !warnings.is_empty(),
2554            "registry-load failure must produce a warning"
2555        );
2556        assert!(
2557            warnings
2558                .iter()
2559                .any(|w| w.contains("hub registry discovery failed")),
2560            "warning must mention registry discovery failure, got: {warnings:?}"
2561        );
2562    }
2563}