Skip to main content

algocline_app/service/
hub.rs

1//! Hub — package discovery, search, and index management.
2//!
3//! The Hub is algocline's package registry layer.  It aggregates remote
4//! index data with local install state so that users (via AI) can
5//! **discover** packages they haven't installed yet, and **inspect**
6//! installed packages with full Card and eval statistics.
7//!
8//! ## Staged design
9//!
10//! | Stage | Scope | Status |
11//! |-------|-------|--------|
12//! | **1** | Card Collection install, Pkg-bundled cards | Done |
13//! | **2** | Hub MCP tools (`hub_search`, `hub_info`, `hub_reindex`), local index | Done |
14//! | **3** | Aggregated remote collection index, `hub_publish`, LP | Planned |
15//!
16//! ## MCP tools
17//!
18//! | Tool | Description |
19//! |------|-------------|
20//! | `alc_hub_search` | Discover packages across remote + local indices |
21//! | `alc_hub_info` | Detailed single-package view (meta + cards + aliases + stats) |
22//! | `alc_hub_reindex` | Rebuild index from local packages or a repo checkout |
23//!
24//! ## Index schema (`hub_index/v0`)
25//!
26//! ```json
27//! {
28//!   "schema_version": "hub_index/v0",
29//!   "updated_at": "2026-04-12T10:00:00Z",
30//!   "packages": [{
31//!     "name": "cot",
32//!     "version": "0.1.0",
33//!     "description": "Chain-of-Thought prompting",
34//!     "category": "reasoning",
35//!     "source": "https://github.com/...",
36//!     "card_count": 3,
37//!     "best_card": { "card_id": "...", "model": "...", "pass_rate": 0.82, "scenario": "..." }
38//!   }]
39//! }
40//! ```
41//!
42//! Index generation uses `init.lua` M.meta parsing only — no Lua VM
43//! required.  This keeps the index buildable in CI environments.
44//!
45//! ## Index URL discovery (4-tier)
46//!
47//! Sources are checked in priority order; URLs are deduplicated:
48//!
49//!   0. **Collection URL** — `[hub].collection_url` in `~/.algocline/config.toml`.
50//!      Aggregated index containing all known packages (Stage 3).
51//!   1. **Hub registries** — `~/.algocline/hub_registries.json`, auto-populated
52//!      by `pkg_install` and `card_install`.
53//!   2. **Installed manifest** — `~/.algocline/installed.json`, fallback for
54//!      sources registered before registries existed.
55//!   3. **Compiled-in seeds** — bundled-packages source for first-run bootstrap.
56//!
57//! GitHub repo URLs are transformed to raw index URLs:
58//!
59//! ```text
60//! https://github.com/{owner}/{repo}
61//!   → https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
62//! ```
63//!
64//! ## Caching
65//!
66//! Remote indices are cached per-source at
67//! `~/.algocline/hub_cache/{hash}.json` where hash is FNV-1a of the
68//! URL.  TTL is 1 hour.
69//!
70//! ## Registry persistence
71//!
72//! `~/.algocline/hub_registries.json` records source URLs from
73//! `pkg_install` and `card_install`.  Written atomically (tempfile +
74//! rename) to avoid corruption on interruption.
75
76use std::collections::{HashMap, HashSet};
77use std::path::PathBuf;
78
79use serde::{Deserialize, Serialize};
80
81use algocline_core::{AppDir, PkgEntity, PkgType};
82
83use super::list_opts::{
84    apply_sort_by_value, matches_filter, parse_sort, project_fields, resolve_fields, ListOpts,
85    HUB_SEARCH_FULL, HUB_SEARCH_SUMMARY,
86};
87use super::manifest;
88use super::resolve::AUTO_INSTALL_SOURCES;
89use super::source::PackageSource;
90use super::AppService;
91use super::HubRegistriesError;
92
93// ─── Constants ─────────────────────────────────────────────────
94
95/// Cache TTL in seconds (1 hour).
96const CACHE_TTL_SECS: u64 = 3600;
97
98/// HTTP request timeout (30 seconds).
99const HTTP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
100
101// ─── Index schema ──────────────────────────────────────────────
102
103/// Remote index — same shape as the local index so merge is trivial.
104#[derive(Debug, Clone, Serialize, Deserialize)]
105pub(crate) struct HubIndex {
106    pub schema_version: String,
107    #[serde(default)]
108    pub updated_at: String,
109    #[serde(default)]
110    pub packages: Vec<IndexEntry>,
111}
112
113/// One package in the index.
114///
115/// `entity` carries the canonical Lua `M.meta` projection (name, version,
116/// description, category, docstring) via `#[serde(flatten)]` so the wire
117/// shape is identical to the pre-refactor flat-object layout. `source`
118/// is the typed package source; `card_count` / `best_card` are hub-side
119/// enrichments computed at index-build time.
120#[derive(Debug, Clone, Serialize, Deserialize)]
121pub(crate) struct IndexEntry {
122    #[serde(flatten)]
123    pub entity: PkgEntity,
124    /// How this package was obtained. Typed on write; legacy bare strings
125    /// in pre-migration `hub_index.json` deserialize via the serde shim
126    /// on `PackageSource` (see `service::source`).
127    #[serde(default)]
128    pub source: PackageSource,
129    #[serde(default)]
130    pub card_count: usize,
131    #[serde(default)]
132    pub best_card: Option<BestCard>,
133}
134
135/// Best card summary within a package.
136#[derive(Debug, Clone, Serialize, Deserialize)]
137pub(crate) struct BestCard {
138    pub card_id: String,
139    #[serde(default)]
140    pub model: String,
141    #[serde(default)]
142    pub pass_rate: f64,
143    #[serde(default)]
144    pub scenario: String,
145}
146
147/// Search result — index entry enriched with local install state.
148///
149/// `entity.docstring` is `skip_serializing` (via the `skip_docstring`
150/// custom serializer on the flattened struct) so the default serde output
151/// never exposes the docstring field — docstrings can be large and
152/// dominate payload size. The `hub_search` projection path re-attaches
153/// the docstring to the output object when the resolved field set
154/// contains `"docstring"`, via
155/// [`SearchResult::to_value_with_optional_docstring`].
156///
157/// `docstring_matched` is a query-time signal: it is `Some(true)` only
158/// when the query hit docstring and none of {name, description, category}.
159/// Otherwise (no query, or query hit any of the other fields) it is
160/// `None` and omitted from the output.
161///
162/// Because `#[serde(flatten)]` composes poorly with field-level
163/// `skip_serializing`, we carry the non-docstring part of `PkgEntity`
164/// via a custom `serialize_entity_without_docstring` path rather than a
165/// bare `#[serde(flatten)]`. The struct still holds a full `PkgEntity`
166/// internally for consistency with `IndexEntry`.
167#[derive(Debug, Clone, Serialize)]
168struct SearchResult {
169    #[serde(flatten, serialize_with = "serialize_entity_without_docstring")]
170    entity: PkgEntity,
171    /// Typed source (mirrors `IndexEntry.source`).
172    source: PackageSource,
173    installed: bool,
174    card_count: usize,
175    best_card: Option<BestCard>,
176    #[serde(skip_serializing_if = "Option::is_none")]
177    docstring_matched: Option<bool>,
178}
179
180/// Serialize a `PkgEntity` as a flat JSON object, intentionally dropping
181/// the `docstring` field so large docstrings do not dominate `hub_search`
182/// payloads. The projection path re-attaches docstring via
183/// [`SearchResult::to_value_with_optional_docstring`].
184fn serialize_entity_without_docstring<S>(entity: &PkgEntity, ser: S) -> Result<S::Ok, S::Error>
185where
186    S: serde::Serializer,
187{
188    use serde::ser::SerializeMap;
189    let mut map = ser.serialize_map(Some(6))?;
190    map.serialize_entry("name", &entity.name)?;
191    map.serialize_entry("version", &entity.version)?;
192    map.serialize_entry("description", &entity.description)?;
193    map.serialize_entry("category", &entity.category)?;
194    map.serialize_entry("tags", &entity.tags)?;
195    map.serialize_entry("type", &entity.pkg_type)?;
196    map.end()
197}
198
199impl SearchResult {
200    /// Serialize `self` to a JSON `Value`, optionally re-attaching
201    /// `docstring` to the resulting object.
202    ///
203    /// `skip_serializing` removes `docstring` from every serde output
204    /// path. When projection selects `docstring` as an output field, we
205    /// need to put it back — this helper bridges that gap by inserting
206    /// the field manually into the resulting `Value::Object`.
207    ///
208    /// Returns the original `Value` unchanged if serialization produced
209    /// a non-object (should not happen for `SearchResult`, but we stay
210    /// defensive because the downstream `project_fields` contract
211    /// tolerates non-objects).
212    fn to_value_with_optional_docstring(&self, include_docstring: bool) -> serde_json::Value {
213        let mut v = serde_json::to_value(self).unwrap_or(serde_json::Value::Null);
214        if include_docstring {
215            if let serde_json::Value::Object(ref mut map) = v {
216                let doc = self.entity.docstring.clone().unwrap_or_default();
217                map.insert("docstring".to_string(), serde_json::Value::String(doc));
218            }
219        }
220        v
221    }
222}
223
224// ─── Hub registries ───────────────────────────────────────────
225//
226// Persistent file (`~/.algocline/hub_registries.json`) that records
227// source URLs from `pkg_install` and `card_install`.  This is the
228// primary source for Hub index URL discovery — the manifest and the
229// bundled-packages seed serve as fallback sources.
230
231/// One entry in `hub_registries.json`.
232#[derive(Debug, Clone, Serialize, Deserialize)]
233pub(crate) struct RegistryEntry {
234    /// Original source URL (Git repo or local path).
235    pub source: String,
236    /// How it was registered: "pkg_install" or "card_install".
237    pub origin: String,
238    /// ISO 8601 timestamp of when the entry was added.
239    pub added_at: String,
240}
241
242/// Top-level registries file.
243#[derive(Debug, Clone, Serialize, Deserialize, Default)]
244pub(crate) struct HubRegistries {
245    pub registries: Vec<RegistryEntry>,
246}
247
248fn registries_path(app_dir: &AppDir) -> PathBuf {
249    app_dir.hub_registries_json()
250}
251
252/// Load registries from disk.
253///
254/// Returns `Ok(HubRegistries::default())` when the file does not yet exist —
255/// the file is created lazily on first `register_source` call. Returns `Err`
256/// when the file exists but cannot be read (I/O error) or parsed (corrupt
257/// JSON), so callers can surface the failure instead of silently degrading hub
258/// discovery.
259fn load_registries(app_dir: &AppDir) -> Result<HubRegistries, HubRegistriesError> {
260    let path = registries_path(app_dir);
261    if !path.exists() {
262        return Ok(HubRegistries::default());
263    }
264    let content = std::fs::read_to_string(&path).map_err(|e| {
265        HubRegistriesError::Parse(format!(
266            "failed to read hub_registries.json at {}: {e}",
267            path.display()
268        ))
269    })?;
270    serde_json::from_str::<HubRegistries>(&content).map_err(|e| {
271        HubRegistriesError::Parse(format!(
272            "failed to parse hub_registries.json at {}: {e}",
273            path.display()
274        ))
275    })
276}
277
278/// Register a source URL.  Deduplicates by normalized URL.
279///
280/// Returns `Ok(())` on success or when the input is skipped (empty /
281/// local path / already registered). Filesystem failures are returned
282/// as `Err(String)` so callers can surface them on the MCP wire
283/// response — the registry is best-effort relative to the `pkg_install`
284/// itself, but the caller still needs to know when it silently failed
285/// (otherwise hub discovery degrades without any signal).
286///
287/// Uses atomic write (tempfile + rename) to avoid partial writes if
288/// the process is interrupted. Read-modify-write is not locked across
289/// processes, but MCP servers are single-process so this is safe in
290/// practice.
291pub(crate) fn register_source(app_dir: &AppDir, source: &str, origin: &str) -> Result<(), String> {
292    let normalized = source.trim_end_matches('/').to_string();
293    if normalized.is_empty() {
294        return Ok(());
295    }
296    // Skip local paths — they can't host a remote index
297    if normalized.starts_with('/') || normalized.starts_with('.') {
298        return Ok(());
299    }
300
301    let path = registries_path(app_dir);
302    if let Some(parent) = path.parent() {
303        std::fs::create_dir_all(parent).map_err(|e| {
304            format!(
305                "failed to create hub registries dir {}: {e}",
306                parent.display()
307            )
308        })?;
309    }
310
311    // Re-read from disk right before write to minimize TOCTOU window.
312    // Parse failure is propagated — a corrupt registries file means we
313    // cannot safely read-modify-write without risking data loss.
314    let mut reg = load_registries(app_dir).map_err(|e| format!("cannot register source: {e}"))?;
315
316    // Already registered?
317    if reg
318        .registries
319        .iter()
320        .any(|e| e.source.trim_end_matches('/') == normalized)
321    {
322        return Ok(());
323    }
324
325    reg.registries.push(RegistryEntry {
326        source: normalized,
327        origin: origin.to_string(),
328        added_at: manifest::now_iso8601(),
329    });
330
331    // Atomic write: write to temp file, then rename
332    let json = serde_json::to_string_pretty(&reg)
333        .map_err(|e| format!("failed to serialize hub registries: {e}"))?;
334    let tmp_path = path.with_extension("json.tmp");
335    std::fs::write(&tmp_path, &json).map_err(|e| {
336        format!(
337            "failed to write hub registries tmp {}: {e}",
338            tmp_path.display()
339        )
340    })?;
341    std::fs::rename(&tmp_path, &path).map_err(|e| {
342        // Best-effort cleanup of the stale tmp file on rename failure.
343        let _ = std::fs::remove_file(&tmp_path);
344        format!(
345            "failed to atomically rename hub registries onto {}: {e}",
346            path.display()
347        )
348    })
349}
350
351// ─── Hub config ──────────────────────────────────────────────
352//
353// Optional `[hub]` section in `~/.algocline/config.toml`:
354//
355//   [hub]
356//   collection_url = "https://raw.githubusercontent.com/.../hub_index.json"
357//
358// When set, this is fetched as Tier 0 (the aggregated collection
359// index containing all known packages, including uninstalled ones).
360
361/// Read the `[hub].collection_url` from `~/.algocline/config.toml`.
362///
363/// Returns:
364/// - `Ok(Some(url))` — file exists, parses cleanly, `[hub].collection_url` present and non-empty.
365/// - `Ok(None)` — file absent (normal: config is optional) or `[hub].collection_url` not set.
366/// - `Err(msg)` — file exists but TOML parse fails (corruption); caller should surface as warning.
367fn collection_url_from_config(app_dir: &AppDir) -> Result<Option<String>, String> {
368    let path = app_dir.config_toml();
369    let content = match std::fs::read_to_string(&path) {
370        Ok(c) => c,
371        Err(e) if e.kind() == std::io::ErrorKind::NotFound => return Ok(None),
372        Err(_) => return Ok(None), // permission errors etc. treated as absent
373    };
374    let doc: toml_edit::DocumentMut = content
375        .parse()
376        .map_err(|e| format!("config.toml parse: {e}"))?;
377    let url = match doc
378        .get("hub")
379        .and_then(|h| h.get("collection_url"))
380        .and_then(|v| v.as_str())
381    {
382        Some(s) => s.trim().to_string(),
383        None => return Ok(None),
384    };
385    if url.is_empty() {
386        Ok(None)
387    } else {
388        Ok(Some(url))
389    }
390}
391
392// ─── Index URL discovery ──────────────────────────────────────
393//
394// Derives remote index URLs from:
395//   0. Hub Collection URL (from config.toml) — aggregated index
396//   1. Hub registries (`hub_registries.json`) — primary source
397//   2. Unique `source` fields in the installed-packages manifest
398//   3. Bundled-packages seed (for first-run bootstrap)
399//
400// GitHub repos are transformed:
401//   https://github.com/{owner}/{repo}  →
402//   https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
403
404/// Convert a GitHub repo URL to a raw `hub_index.json` URL.
405/// Returns `None` for non-GitHub URLs (future: support other hosts).
406fn repo_to_index_url(repo_url: &str) -> Option<String> {
407    let trimmed = repo_url.trim_end_matches('/').trim_end_matches(".git");
408    if let Some(path) = trimmed.strip_prefix("https://github.com/") {
409        // path = "owner/repo"
410        let parts: Vec<&str> = path.splitn(3, '/').collect();
411        if parts.len() >= 2 {
412            return Some(format!(
413                "https://raw.githubusercontent.com/{}/{}/main/hub_index.json",
414                parts[0], parts[1]
415            ));
416        }
417    }
418    // Non-GitHub URL: assume it's already a direct index URL
419    if trimmed.ends_with(".json") {
420        Some(trimmed.to_string())
421    } else {
422        None
423    }
424}
425
426/// Collect unique index URLs from config + registries + manifest + bundled seeds.
427///
428/// Returns `Err` if the installed manifest cannot be read (corrupt JSON /
429/// permission denied). The function intentionally surfaces manifest-read
430/// failures rather than silently skipping — callers feed these URLs into
431/// hub resolution, and a partial URL set is indistinguishable from a
432/// corrupt manifest without the signal.
433///
434/// `warnings` collects non-fatal issues (e.g. config.toml TOML parse failure)
435/// that the caller should surface on the MCP wire response.
436fn discover_index_urls(
437    app_dir: &AppDir,
438    warnings: &mut Vec<String>,
439) -> Result<Vec<String>, String> {
440    let mut index_urls: Vec<String> = Vec::new();
441
442    // 0. From config.toml [hub].collection_url (Tier 0 — aggregated collection).
443    // Parse failures (corrupted config) are collected as warnings so the
444    // rest of discovery proceeds — the file is optional, but corruption
445    // is distinguishable from absence and must be surfaced to the caller.
446    match collection_url_from_config(app_dir) {
447        Ok(Some(url)) => index_urls.push(url),
448        Ok(None) => {}
449        Err(e) => warnings.push(format!("config.toml hub.collection_url: {e}")),
450    }
451
452    let mut repo_urls: HashSet<String> = HashSet::new();
453
454    // 1. From hub registries (primary). Parse failure is propagated so
455    // callers know the registry is degraded — a partial URL set from a
456    // corrupt file is indistinguishable from intentionally empty.
457    // `HubRegistriesError` is converted to `String` at the wire boundary
458    // (`discover_index_urls` still returns `Result<_, String>`).
459    let reg = load_registries(app_dir).map_err(|e| e.to_string())?;
460    for entry in &reg.registries {
461        let normalized = entry.source.trim_end_matches('/').to_string();
462        if !normalized.is_empty() {
463            repo_urls.insert(normalized);
464        }
465    }
466
467    // 2. From manifest (catch sources registered before hub_registries existed).
468    // Only Git-variant sources can host a remote hub_index.json; other variants
469    // (Path / Installed / Bundled / Unknown) are skipped by `git_url()` returning None.
470    let m = manifest::load_manifest(app_dir)?;
471    for entry in m.packages.values() {
472        if let Some(url) = entry.source.git_url() {
473            let normalized = url.trim_end_matches('/').to_string();
474            if !normalized.is_empty() {
475                repo_urls.insert(normalized);
476            }
477        }
478    }
479
480    // 3. Fallback: bundled sources (ensures at least these are checked)
481    for url in AUTO_INSTALL_SOURCES {
482        repo_urls.insert(url.to_string());
483    }
484
485    // 4. Transform repo URLs → index URLs, dedup against Tier 0
486    let existing: HashSet<String> = index_urls.iter().cloned().collect();
487    let mut derived: Vec<String> = repo_urls
488        .iter()
489        .filter_map(|url| repo_to_index_url(url))
490        .filter(|url| !existing.contains(url))
491        .collect();
492    derived.sort();
493    derived.dedup();
494    index_urls.extend(derived);
495
496    Ok(index_urls)
497}
498
499// ─── Per-source cache ─────────────────────────────────────────
500//
501// Each remote index is cached separately at
502// `~/.algocline/hub_cache/{hash}.json` where hash is derived from
503// the index URL. This avoids mixing data from different registries
504// and allows per-source TTL validation.
505
506fn cache_dir(app_dir: &AppDir) -> PathBuf {
507    app_dir.hub_cache_dir()
508}
509
510fn cache_key(url: &str) -> String {
511    // Simple hash: use the URL bytes to produce a stable hex string.
512    // Avoids pulling in a hash crate — good enough for cache file naming.
513    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV-1a offset basis
514    for b in url.as_bytes() {
515        h ^= *b as u64;
516        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
517    }
518    format!("{h:016x}")
519}
520
521/// Result of a cache lookup distinguishing absent, stale, fresh, and corrupt.
522///
523/// Used by `load_cached_full` (called from `aggregate_index`) to allow
524/// stale data to be merged into the aggregate while a warning is emitted.
525/// `load_cached` (used by `fetch_one`) maps both `NotPresent` and `Stale`
526/// to `Ok(None)` for backward compat.
527enum CacheLookup {
528    /// File absent.
529    NotPresent,
530    /// File present but older than `CACHE_TTL_SECS`; contains the stale data.
531    Stale(HubIndex),
532    /// File present, within TTL, parsed cleanly.
533    Fresh(HubIndex),
534    /// File present (within TTL) but JSON parse failed.
535    Corrupt(String),
536}
537
538/// Full cache lookup that distinguishes stale from absent.
539///
540/// Used by `aggregate_index` so stale data can still be merged with a
541/// warning, rather than being silently discarded.
542fn load_cached_full(app_dir: &AppDir, url: &str) -> CacheLookup {
543    let dir = cache_dir(app_dir);
544    let path = dir.join(format!("{}.json", cache_key(url)));
545    if !path.exists() {
546        return CacheLookup::NotPresent;
547    }
548    let metadata = match std::fs::metadata(&path) {
549        Ok(m) => m,
550        Err(_) => return CacheLookup::NotPresent,
551    };
552    let age = match metadata.modified().ok().and_then(|t| t.elapsed().ok()) {
553        Some(a) => a,
554        None => return CacheLookup::NotPresent,
555    };
556    let content = match std::fs::read_to_string(&path) {
557        Ok(c) => c,
558        Err(e) => return CacheLookup::Corrupt(format!("hub cache read {}: {e}", path.display())),
559    };
560    match serde_json::from_str::<HubIndex>(&content) {
561        Ok(index) => {
562            if age.as_secs() > CACHE_TTL_SECS {
563                CacheLookup::Stale(index)
564            } else {
565                CacheLookup::Fresh(index)
566            }
567        }
568        Err(e) => CacheLookup::Corrupt(format!("hub cache parse {}: {e}", path.display())),
569    }
570}
571
572/// Load cached remote index for a specific URL if fresh (within TTL).
573///
574/// Returns:
575/// - `Ok(Some(index))` — cache hit: file exists, within TTL, parses cleanly.
576/// - `Ok(None)` — cache miss: file absent, expired, or metadata unreadable (treat as miss).
577/// - `Err(msg)` — file exists and is within TTL but JSON parse fails (corruption);
578///   caller should surface as warning and fall back to a network fetch.
579fn load_cached(app_dir: &AppDir, url: &str) -> Result<Option<HubIndex>, String> {
580    match load_cached_full(app_dir, url) {
581        CacheLookup::Fresh(index) => Ok(Some(index)),
582        CacheLookup::NotPresent | CacheLookup::Stale(_) => Ok(None),
583        CacheLookup::Corrupt(msg) => Err(msg),
584    }
585}
586
587/// Save remote index to per-source cache file.
588///
589/// Returns `Ok(())` on success. Cache write failures are returned as
590/// `Err(String)`; the caller (`fetch_one`) carries them out of band so
591/// hub fetch still completes (the index is in memory) but the warning
592/// surfaces to the MCP wire response via the existing `warnings` channel.
593fn save_cached(app_dir: &AppDir, url: &str, index: &HubIndex) -> Result<(), String> {
594    let dir = cache_dir(app_dir);
595    std::fs::create_dir_all(&dir)
596        .map_err(|e| format!("failed to create hub cache dir {}: {e}", dir.display()))?;
597    let path = dir.join(format!("{}.json", cache_key(url)));
598    let json = serde_json::to_string_pretty(index)
599        .map_err(|e| format!("failed to serialize hub cache: {e}"))?;
600    std::fs::write(&path, json)
601        .map_err(|e| format!("failed to write hub cache {}: {e}", path.display()))
602}
603
604// ─── Remote fetch ──────────────────────────────────────────────
605
606/// Fetch a single remote index by URL, using per-source cache.
607///
608/// Returns the index plus an optional cache-related warning. The warning
609/// is non-None when either:
610/// - The network fetch succeeded but persisting the cache to disk failed.
611/// - The cache file was present and within TTL but failed to parse
612///   (corruption); in that case the function falls back to a network
613///   fetch and includes the parse-failure in the warning so the operator
614///   can investigate the on-disk state.
615fn fetch_one(app_dir: &AppDir, url: &str) -> Result<(HubIndex, Option<String>), String> {
616    // Distinguish cache corruption (Err) from cache miss (Ok(None)).
617    match load_cached(app_dir, url) {
618        Ok(Some(cached)) => return Ok((cached, None)),
619        Ok(None) => {} // cache miss — proceed to network fetch
620        Err(e) => {
621            // Cache file is corrupt. Fall through to network fetch and
622            // carry the corruption warning so the caller can surface it.
623            // We don't return Err here because the network path may still succeed.
624            let warn = format!("hub cache corrupted for {url}: {e}; falling back to network");
625            // Attempt network fetch; on success, attach the cache-corruption warning.
626            return fetch_one_from_network(app_dir, url)
627                .map(|(idx, save_warn)| {
628                    // Prefer the corruption warning; save_warn is secondary.
629                    let combined = Some(match save_warn {
630                        Some(sw) => format!("{warn}; {sw}"),
631                        None => warn.clone(),
632                    });
633                    (idx, combined)
634                })
635                .map_err(|fetch_err| format!("{warn}; network fetch also failed: {fetch_err}"));
636        }
637    }
638
639    fetch_one_from_network(app_dir, url)
640}
641
642/// Network-only path for fetching a remote index (no cache read).
643///
644/// On success returns `(index, Option<cache_write_warning>)`.
645fn fetch_one_from_network(
646    app_dir: &AppDir,
647    url: &str,
648) -> Result<(HubIndex, Option<String>), String> {
649    let agent = ureq::Agent::new_with_config(
650        ureq::config::Config::builder()
651            .timeout_global(Some(HTTP_TIMEOUT))
652            .build(),
653    );
654    let body: String = agent
655        .get(url)
656        .call()
657        .map_err(|e| format!("Failed to fetch {url}: {e}"))?
658        .body_mut()
659        .read_to_string()
660        .map_err(|e| format!("Failed to read response from {url}: {e}"))?;
661
662    let index: HubIndex = serde_json::from_str(&body)
663        .map_err(|e| format!("Failed to parse index from {url}: {e}"))?;
664
665    let cache_warning = save_cached(app_dir, url, &index)
666        .err()
667        .map(|e| format!("hub cache write for {url}: {e}"));
668    Ok((index, cache_warning))
669}
670
671/// Fetch all discovered remote indices and merge into one.
672/// Falls back gracefully: failed sources are skipped with warnings.
673fn fetch_remote_indices(app_dir: &AppDir) -> Result<(HubIndex, Vec<String>), String> {
674    let mut warnings: Vec<String> = Vec::new();
675    let urls = discover_index_urls(app_dir, &mut warnings)?;
676    let mut all_packages: Vec<IndexEntry> = Vec::new();
677    let mut seen_names: HashSet<String> = HashSet::new();
678
679    for url in &urls {
680        match fetch_one(app_dir, url) {
681            Ok((index, cache_warning)) => {
682                for entry in index.packages {
683                    if seen_names.insert(entry.entity.name.clone()) {
684                        all_packages.push(entry);
685                    }
686                    // If duplicate name across sources, first wins
687                }
688                if let Some(w) = cache_warning {
689                    warnings.push(w);
690                }
691            }
692            Err(e) => {
693                warnings.push(e);
694            }
695        }
696    }
697
698    if all_packages.is_empty() && !warnings.is_empty() {
699        warnings.insert(
700            0,
701            "all remote indices unavailable, showing local packages only".to_string(),
702        );
703    }
704
705    let merged = HubIndex {
706        schema_version: "hub_index/v0".into(),
707        updated_at: String::new(),
708        packages: all_packages,
709    };
710    Ok((merged, warnings))
711}
712
713// ─── Local state ───────────────────────────────────────────────
714
715/// Build a set of locally installed package names from `installed.json`
716/// and the `~/.algocline/packages/` directory.
717fn installed_packages(app_dir: &AppDir) -> Result<HashMap<String, Option<String>>, String> {
718    let mut map = HashMap::new();
719
720    // From manifest (has version info)
721    let m = manifest::load_manifest(app_dir)?;
722    for (name, entry) in &m.packages {
723        map.insert(name.clone(), entry.version.clone());
724    }
725
726    // Also scan packages/ dir in case manifest is stale
727    let pkg_dir = app_dir.packages_dir();
728    if let Ok(entries) = std::fs::read_dir(&pkg_dir) {
729        for entry in entries.flatten() {
730            if entry.path().is_dir() {
731                if let Some(name) = entry.file_name().to_str() {
732                    map.entry(name.to_string()).or_insert(None);
733                }
734            }
735        }
736    }
737
738    Ok(map)
739}
740
741/// Count local cards per package from `{app_dir}/cards/{pkg}/`.
742fn local_card_counts(app_dir: &AppDir) -> HashMap<String, usize> {
743    let mut map = HashMap::new();
744    let cards_dir = app_dir.cards_dir();
745    let entries = match std::fs::read_dir(&cards_dir) {
746        Ok(e) => e,
747        Err(_) => return map,
748    };
749    for entry in entries.flatten() {
750        if !entry.path().is_dir() {
751            continue;
752        }
753        let pkg = match entry.file_name().to_str() {
754            Some(n) => n.to_string(),
755            None => continue,
756        };
757        let count = std::fs::read_dir(entry.path())
758            .map(|es| {
759                es.flatten()
760                    .filter(|e| e.path().extension().is_some_and(|ext| ext == "toml"))
761                    .count()
762            })
763            .unwrap_or(0);
764        if count > 0 {
765            map.insert(pkg, count);
766        }
767    }
768    map
769}
770
771/// Count eval results for a specific package by scanning `{app_dir}/evals/`.
772///
773/// Reads only `.meta.json` files (lightweight) to check the strategy field.
774/// Falls back to reading full eval JSON if meta is missing.
775///
776/// `warnings` receives per-file corruption messages (read or parse failures).
777/// I/O errors on the directory itself return 0 silently (evals dir absent is
778/// a legitimate "no evals yet" state). Per-file errors that indicate corruption
779/// (file exists but is unreadable or unparseable) are pushed to `warnings` so
780/// the caller can surface them on the MCP wire response.
781fn count_evals_for_pkg(app_dir: &AppDir, pkg: &str, warnings: &mut Vec<String>) -> usize {
782    let evals_dir = app_dir.evals_dir();
783    let entries = match std::fs::read_dir(&evals_dir) {
784        Ok(e) => e,
785        Err(_) => return 0,
786    };
787
788    // Collect all filenames first so ordering doesn't matter.
789    // We track stems that have a .meta.json to avoid reading the full eval JSON.
790    let mut meta_stems: HashSet<String> = HashSet::new();
791    let mut meta_matches: usize = 0;
792    let mut non_meta_paths: Vec<(PathBuf, String)> = Vec::new(); // (path, stem)
793
794    for entry in entries.flatten() {
795        let path = entry.path();
796        let name = match path.file_name().and_then(|n| n.to_str()) {
797            Some(n) => n.to_string(),
798            None => continue,
799        };
800
801        if name.ends_with(".meta.json") {
802            let stem = name.trim_end_matches(".meta.json").to_string();
803            meta_stems.insert(stem.clone());
804            // Distinguish I/O failure from parse failure so corruption is visible.
805            match std::fs::read_to_string(&path) {
806                Ok(content) => match serde_json::from_str::<serde_json::Value>(&content) {
807                    Ok(val) => {
808                        if val.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
809                            meta_matches += 1;
810                        }
811                    }
812                    Err(e) => warnings.push(format!("eval meta parse {}: {e}", path.display())),
813                },
814                Err(e) => warnings.push(format!("eval meta read {}: {e}", path.display())),
815            }
816            continue;
817        }
818
819        // Skip non-json or comparison files
820        if !name.ends_with(".json") || name.starts_with("compare_") {
821            continue;
822        }
823
824        let stem = path
825            .file_stem()
826            .and_then(|s| s.to_str())
827            .unwrap_or("")
828            .to_string();
829        non_meta_paths.push((path, stem));
830    }
831
832    // Only read full eval JSON for entries without a .meta.json.
833    // Distinguish I/O and parse failures; both are surfaced as warnings.
834    let mut fallback_matches: usize = 0;
835    for (path, stem) in &non_meta_paths {
836        if meta_stems.contains(stem) {
837            continue;
838        }
839        match std::fs::read_to_string(path) {
840            Ok(c) => match serde_json::from_str::<serde_json::Value>(&c) {
841                Ok(v) => {
842                    if v.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
843                        fallback_matches += 1;
844                    }
845                }
846                Err(e) => warnings.push(format!("eval result parse {}: {e}", path.display())),
847            },
848            Err(e) => warnings.push(format!("eval result read {}: {e}", path.display())),
849        }
850    }
851
852    meta_matches + fallback_matches
853}
854
855// ─── Merge ─────────────────────────────────────────────────────
856
857/// Merge remote index with local install state.
858///
859/// When a package is installed locally and the remote index lacks a
860/// docstring (pre-v0.21 indices), the docstring is extracted from the
861/// local `init.lua` so that full-text search works immediately.
862fn merge(app_dir: &AppDir, remote: &HubIndex) -> Result<Vec<SearchResult>, String> {
863    let installed = installed_packages(app_dir)?;
864    let card_counts = local_card_counts(app_dir);
865    let pkg_dir: Option<PathBuf> = Some(app_dir.packages_dir());
866
867    let mut seen: HashSet<String> = HashSet::new();
868    let mut results: Vec<SearchResult> = Vec::new();
869
870    for entry in &remote.packages {
871        let pkg_name = &entry.entity.name;
872        let is_installed = installed.contains_key(pkg_name);
873        let local_cards = card_counts.get(pkg_name).copied().unwrap_or(0);
874
875        // Supplement empty docstring from local init.lua when installed.
876        // Re-parse via `PkgEntity` so the supplementation path stays
877        // consistent with `build_index`.
878        let docstring = if entry.entity.docstring.as_deref().unwrap_or("").is_empty()
879            && is_installed
880        {
881            pkg_dir
882                .as_ref()
883                .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(pkg_name).join("init.lua")))
884                .and_then(|e| e.docstring)
885        } else {
886            entry.entity.docstring.clone()
887        };
888
889        seen.insert(pkg_name.clone());
890        let mut merged_entity = entry.entity.clone();
891        merged_entity.docstring = docstring;
892        merged_entity.pkg_type = merged_entity.pkg_type.or(Some(PkgType::Runnable));
893        results.push(SearchResult {
894            entity: merged_entity,
895            source: entry.source.clone(),
896            installed: is_installed,
897            card_count: if is_installed && local_cards > entry.card_count {
898                local_cards
899            } else {
900                entry.card_count
901            },
902            best_card: entry.best_card.clone(),
903            docstring_matched: None,
904        });
905    }
906
907    // Add local-only packages (not in remote index).
908    for (name, version) in &installed {
909        if seen.contains(name) {
910            continue;
911        }
912        // Pull full `PkgEntity` from local init.lua when available (keeps the
913        // wire shape consistent with remote entries). When the package does
914        // not parse as a `PkgEntity` (missing `M.meta.name`), fall back to
915        // a minimal entity with just the directory name and the manifest
916        // version — the entry still appears in local-only listings, but the
917        // richer projection fields are simply absent.
918        let parsed_entity = pkg_dir
919            .as_ref()
920            .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(name).join("init.lua")));
921        let entity = parsed_entity.unwrap_or(PkgEntity {
922            name: name.clone(),
923            version: version.clone(),
924            description: None,
925            category: None,
926            docstring: None,
927            tags: None,
928            pkg_type: Some(PkgType::Runnable),
929            type_source: None,
930        });
931        results.push(SearchResult {
932            entity,
933            source: PackageSource::Unknown,
934            installed: true,
935            card_count: card_counts.get(name).copied().unwrap_or(0),
936            best_card: None,
937            docstring_matched: None,
938        });
939    }
940
941    Ok(results)
942}
943
944// ─── Search (filtering) ───────────────────────────────────────
945
946fn matches_query(result: &SearchResult, query: &str) -> bool {
947    let q = query.to_lowercase();
948    let pkg = &result.entity;
949    let empty = String::new();
950    pkg.name.to_lowercase().contains(&q)
951        || pkg
952            .description
953            .as_ref()
954            .unwrap_or(&empty)
955            .to_lowercase()
956            .contains(&q)
957        || pkg
958            .category
959            .as_ref()
960            .unwrap_or(&empty)
961            .to_lowercase()
962            .contains(&q)
963        || pkg
964            .docstring
965            .as_ref()
966            .unwrap_or(&empty)
967            .to_lowercase()
968            .contains(&q)
969        || pkg
970            .tags
971            .as_ref()
972            .is_some_and(|tags| tags.iter().any(|tag| tag.to_lowercase().contains(&q)))
973}
974
975// ─── Index generation (reindex) ───────────────────────────────
976//
977// The non-Lua-VM parser that used to live here
978// (`parse_meta_from_init_lua` / `extract_docstring`) has moved into
979// `algocline_core::PkgEntity::parse_from_init_lua`, where it is shared
980// with the manifest / lockfile wire format. The parsing tests migrated
981// with it; `hub.rs` now just consumes the typed `PkgEntity` projection.
982
983/// Build a hub index by scanning a packages directory.
984///
985/// When `source_dir` is provided, scans that directory directly
986/// (for generating an index from a repo checkout).  Metadata comes
987/// only from `init.lua` — no manifest lookup, no card counts.
988///
989/// When `source_dir` is `None`, scans `~/.algocline/packages/` and
990/// enriches entries with manifest source and local card counts.
991fn build_index(app_dir: &AppDir, source_dir: Option<&std::path::Path>) -> Result<HubIndex, String> {
992    let empty = || HubIndex {
993        schema_version: "hub_index/v0".into(),
994        updated_at: super::manifest::now_iso8601(),
995        packages: Vec::new(),
996    };
997
998    let pkg_dir = match source_dir {
999        Some(d) => d.to_path_buf(),
1000        None => app_dir.packages_dir(),
1001    };
1002
1003    let use_local_state = source_dir.is_none();
1004    let card_counts = if use_local_state {
1005        local_card_counts(app_dir)
1006    } else {
1007        HashMap::new()
1008    };
1009    // Manifest read errors surface as `Err` rather than degrading to an
1010    // empty manifest — when building the local hub index, a corrupt
1011    // `installed.json` silently turning all package sources into
1012    // `PackageSource::Unknown` would be indistinguishable from the
1013    // legitimate "no source recorded" state, and would ship into
1014    // generated `hub_index.json` files verbatim.
1015    let manifest = if use_local_state {
1016        manifest::load_manifest(app_dir)?
1017    } else {
1018        manifest::Manifest::default()
1019    };
1020
1021    let mut entries = Vec::new();
1022
1023    // Missing / unreadable `pkg_dir` is a legitimate "no packages yet"
1024    // state on a fresh install — distinct from manifest corruption
1025    // above, and safe to surface as an empty index.
1026    let dir_entries = match std::fs::read_dir(&pkg_dir) {
1027        Ok(e) => e,
1028        Err(_) => return Ok(empty()),
1029    };
1030
1031    for entry in dir_entries.flatten() {
1032        if !entry.path().is_dir() {
1033            continue;
1034        }
1035        let dir_name = match entry.file_name().to_str() {
1036            Some(n) if !n.starts_with('.') && !n.starts_with('_') => n.to_string(),
1037            _ => continue,
1038        };
1039
1040        let init_lua = entry.path().join("init.lua");
1041        if !init_lua.exists() {
1042            continue;
1043        }
1044
1045        // Silent-exclude gate: `PkgEntity::parse_from_init_lua` returns `None`
1046        // when `M.meta` is absent or `M.meta.name` is empty. Directories that
1047        // happen to contain an `init.lua` but aren't algocline packages
1048        // (e.g. `alc_shapes/`, a type DSL library) are dropped from the index
1049        // rather than falling through with a placeholder name — that would
1050        // pollute hub_search.
1051        let Some(entity) = PkgEntity::parse_from_init_lua(&init_lua) else {
1052            continue;
1053        };
1054
1055        // Use manifest source only for local-state mode. When the manifest
1056        // has no record for this directory, default to `PackageSource::Unknown`
1057        // (via `Default`) — hub consumers see it as "source not recorded".
1058        let source = manifest
1059            .packages
1060            .get(&dir_name)
1061            .map(|e| e.source.clone())
1062            .unwrap_or_default();
1063
1064        entries.push(IndexEntry {
1065            entity,
1066            source,
1067            card_count: card_counts.get(&dir_name).copied().unwrap_or(0),
1068            best_card: None,
1069        });
1070    }
1071
1072    entries.sort_by(|a, b| a.entity.name.cmp(&b.entity.name));
1073
1074    Ok(HubIndex {
1075        schema_version: "hub_index/v0".into(),
1076        updated_at: super::manifest::now_iso8601(),
1077        packages: entries,
1078    })
1079}
1080
1081// ─── Public API ────────────────────────────────────────────────
1082
1083impl AppService {
1084    /// Generate a hub index from a packages directory.
1085    ///
1086    /// When `source_dir` is provided, scans that directory (e.g. a
1087    /// repo checkout) — pure metadata extraction, no manifest or card
1088    /// data mixed in.  When omitted, scans `~/.algocline/packages/`.
1089    ///
1090    /// Writes the index to `output_path` (for CI / publishing).
1091    /// Does NOT touch the remote search cache.
1092    pub fn hub_reindex(
1093        &self,
1094        output_path: Option<&str>,
1095        source_dir: Option<&str>,
1096    ) -> Result<String, String> {
1097        let src = source_dir.map(std::path::Path::new);
1098        if let Some(d) = src {
1099            if !d.is_dir() {
1100                return Err(format!("source_dir '{}' is not a directory", d.display()));
1101            }
1102        }
1103        let app_dir = self.log_config.app_dir();
1104        let index = build_index(&app_dir, src)?;
1105
1106        let written_path = if let Some(path) = output_path {
1107            let json = serde_json::to_string_pretty(&index)
1108                .map_err(|e| format!("Failed to serialize index: {e}"))?;
1109            std::fs::write(path, &json)
1110                .map_err(|e| format!("Failed to write index to {path}: {e}"))?;
1111            Some(path.to_string())
1112        } else {
1113            None
1114        };
1115
1116        let response = serde_json::json!({
1117            "package_count": index.packages.len(),
1118            "updated_at": index.updated_at,
1119            "output_path": written_path,
1120            "source_dir": source_dir,
1121        });
1122        Ok(response.to_string())
1123    }
1124
1125    /// Show detailed information for a single package.
1126    ///
1127    /// Aggregates package metadata (from index or local `init.lua`),
1128    /// all Cards, aliases, and eval stats into one response.
1129    pub fn hub_info(&self, pkg: &str) -> Result<String, String> {
1130        use algocline_engine::card;
1131
1132        // Guard against path traversal
1133        if pkg.contains("..") || pkg.contains('/') || pkg.contains('\\') {
1134            return Err(format!("Invalid package name: '{pkg}'"));
1135        }
1136
1137        // Package metadata: try remote index first, fall back to local
1138        let app_dir = self.log_config.app_dir();
1139        let installed = installed_packages(&app_dir)?;
1140        let is_installed = installed.contains_key(pkg);
1141
1142        // Resolve package metadata: try remote index first, fall back to
1143        // local init.lua. `version` / `description` / `category` are modelled
1144        // as `Option<String>` at the `PkgEntity` layer; at this API surface
1145        // we flatten `None` to empty string so the wire shape (non-null
1146        // JSON string fields) stays unchanged for existing consumers.
1147        let (version, description, category, source) = {
1148            let (remote, _) = fetch_remote_indices(&app_dir)?;
1149            if let Some(entry) = remote.packages.iter().find(|e| e.entity.name == pkg) {
1150                (
1151                    entry.entity.version.clone().unwrap_or_default(),
1152                    entry.entity.description.clone().unwrap_or_default(),
1153                    entry.entity.category.clone().unwrap_or_default(),
1154                    entry.source.clone(),
1155                )
1156            } else if is_installed {
1157                // Fall back to local init.lua parse via `PkgEntity`. When
1158                // the file is not a valid package (no `M.meta.name`), we
1159                // degrade gracefully by returning the manifest-recorded
1160                // version and empty string fields — mirroring the pre-typed
1161                // behaviour.
1162                let init_lua = app_dir.packages_dir().join(pkg).join("init.lua");
1163                let entity = PkgEntity::parse_from_init_lua(&init_lua);
1164                let manifest_source = manifest::load_manifest(&app_dir)?
1165                    .packages
1166                    .get(pkg)
1167                    .map(|e| e.source.clone())
1168                    .unwrap_or_default();
1169                match entity {
1170                    Some(e) => (
1171                        e.version.unwrap_or_default(),
1172                        e.description.unwrap_or_default(),
1173                        e.category.unwrap_or_default(),
1174                        manifest_source,
1175                    ),
1176                    None => (
1177                        installed.get(pkg).cloned().flatten().unwrap_or_default(),
1178                        String::new(),
1179                        String::new(),
1180                        manifest_source,
1181                    ),
1182                }
1183            } else {
1184                return Err(format!(
1185                    "Package '{pkg}' not found in remote indices or locally installed packages"
1186                ));
1187            }
1188        };
1189
1190        // Collect warnings additively; surfaced in response JSON so MCP callers
1191        // (Claude Code UI) observe degraded data instead of silent loss.
1192        // See CLAUDE.md §Service 層の Error 伝播規律 — tracing alone is not enough.
1193        let mut warnings: Vec<String> = Vec::new();
1194
1195        // Cards for this package (single call, reused for stats)
1196        let card_rows = match self.card_store.list(Some(pkg)) {
1197            Ok(rows) => rows,
1198            Err(e) => {
1199                let msg = format!("card store list for '{pkg}': {e}");
1200                tracing::warn!("{}", msg);
1201                warnings.push(msg);
1202                vec![]
1203            }
1204        };
1205        let cards_json = card::summaries_to_json(&card_rows);
1206
1207        // Aliases for this package
1208        let aliases_json = match self.card_store.alias_list(Some(pkg)) {
1209            Ok(rows) => card::aliases_to_json(&rows),
1210            Err(e) => {
1211                let msg = format!("card store alias_list for '{pkg}': {e}");
1212                tracing::warn!("{}", msg);
1213                warnings.push(msg);
1214                serde_json::json!([])
1215            }
1216        };
1217
1218        // Stats: card count, best pass_rate, eval count
1219        let card_count = card_rows.len();
1220        let best_pass_rate = card_rows
1221            .iter()
1222            .filter_map(|c| c.pass_rate)
1223            .fold(f64::NEG_INFINITY, f64::max);
1224        let best_pass_rate = if best_pass_rate.is_finite() {
1225            Some(best_pass_rate)
1226        } else {
1227            None
1228        };
1229
1230        // Eval count from evals directory; corruption warnings surfaced additively.
1231        let eval_count = count_evals_for_pkg(&app_dir, pkg, &mut warnings);
1232
1233        let mut response = serde_json::json!({
1234            "pkg": {
1235                "name": pkg,
1236                "version": version,
1237                "description": description,
1238                "category": category,
1239                "source": source,
1240                "installed": is_installed,
1241            },
1242            "cards": cards_json,
1243            "aliases": aliases_json,
1244            "stats": {
1245                "card_count": card_count,
1246                "eval_count": eval_count,
1247                "best_pass_rate": best_pass_rate,
1248            },
1249        });
1250        if !warnings.is_empty() {
1251            response["warnings"] = serde_json::json!(warnings);
1252        }
1253        Ok(response.to_string())
1254    }
1255
1256    /// Search packages across remote indices + local state.
1257    ///
1258    /// Index URLs are discovered from hub registries, manifest sources,
1259    /// and `AUTO_INSTALL_SOURCES`. Each source is cached independently.
1260    ///
1261    /// ## List-tool options (`opts`)
1262    ///
1263    /// The `opts` parameter carries the list-tool primitives
1264    /// (`limit / sort / filter / fields / verbose`) shared with other
1265    /// list-style MCP tools. Defaults:
1266    ///
1267    /// - `limit` — 50 when `None`. `Some(0)` means **no limit** (return
1268    ///   all matching entries — empty-means-all idiom).
1269    /// - `sort` — `"-installed,name"` when `None` (installed first, then
1270    ///   ascending by name).
1271    /// - `filter` — no additional filter. Legacy `category` /
1272    ///   `installed_only` parameters are merged into the filter map when
1273    ///   `filter` does not already contain those keys (explicit
1274    ///   `filter` wins on conflict).
1275    /// - `fields` / `verbose` — projection is applied to every entry in
1276    ///   the `results` array (see
1277    ///   [`super::list_opts::resolve_fields`]). Top-level keys
1278    ///   (`total`, `sources`, `warnings`) are never projected away.
1279    ///
1280    /// ## docstring handling
1281    ///
1282    /// [`SearchResult::docstring`] is `skip_serializing`, so it is
1283    /// absent from the default serialized view. When the resolved
1284    /// projection contains `"docstring"`, it is re-injected into the
1285    /// per-entry JSON via
1286    /// [`SearchResult::to_value_with_optional_docstring`].
1287    pub(crate) fn hub_search(
1288        &self,
1289        query: Option<&str>,
1290        category: Option<&str>,
1291        installed_only: Option<bool>,
1292        opts: ListOpts,
1293        local_indices: Option<Vec<String>>,
1294    ) -> Result<String, String> {
1295        let app_dir = self.log_config.app_dir();
1296        let (mut remote, mut warnings) = fetch_remote_indices(&app_dir)?;
1297
1298        // Merge local index files (pre-push verification / air-gapped use)
1299        // BEFORE the main `merge` step so that installed packages whose
1300        // metadata appears in a local index are surfaced with their full
1301        // entry (version / source / category) instead of the `Unknown`
1302        // stub produced by `merge`'s local-only fallback path. Each path
1303        // is read and deserialized as a HubIndex; failures go to warnings
1304        // and do not abort the search (partial results > hard failure for
1305        // local verification workflows). Collection results from
1306        // `fetch_remote_indices` take priority on name collisions.
1307        let local_index_paths: Vec<String> = local_indices.clone().unwrap_or_default();
1308        if let Some(paths) = local_indices {
1309            let mut existing: HashSet<String> = remote
1310                .packages
1311                .iter()
1312                .map(|p| p.entity.name.clone())
1313                .collect();
1314            for path in &paths {
1315                match std::fs::read_to_string(path) {
1316                    Err(e) => {
1317                        warnings.push(format!("Failed to read local index {path}: {e}"));
1318                    }
1319                    Ok(raw) => match serde_json::from_str::<HubIndex>(&raw) {
1320                        Err(e) => {
1321                            warnings.push(format!("Failed to parse local index {path}: {e}"));
1322                        }
1323                        Ok(idx) => {
1324                            for entry in idx.packages {
1325                                if existing.insert(entry.entity.name.clone()) {
1326                                    remote.packages.push(entry);
1327                                }
1328                            }
1329                        }
1330                    },
1331                }
1332            }
1333        }
1334
1335        let mut results = merge(&app_dir, &remote)?;
1336
1337        // Filter by query (internal signal covers name/description/
1338        // category/docstring — `matches_query` unchanged).
1339        let query_lower = query.filter(|q| !q.is_empty()).map(|q| q.to_lowercase());
1340        if let Some(ref ql) = query_lower {
1341            results.retain(|r| matches_query(r, ql));
1342        }
1343
1344        // Compute docstring_matched per remaining hit: Some(true) only
1345        // when the query matched docstring and none of {name,
1346        // description, category}; otherwise None.
1347        if let Some(ref ql) = query_lower {
1348            for r in &mut results {
1349                let empty = String::new();
1350                let pkg = &r.entity;
1351                let other_hit = pkg.name.to_lowercase().contains(ql)
1352                    || pkg
1353                        .description
1354                        .as_ref()
1355                        .unwrap_or(&empty)
1356                        .to_lowercase()
1357                        .contains(ql)
1358                    || pkg
1359                        .category
1360                        .as_ref()
1361                        .unwrap_or(&empty)
1362                        .to_lowercase()
1363                        .contains(ql);
1364                let doc_hit = pkg
1365                    .docstring
1366                    .as_ref()
1367                    .unwrap_or(&empty)
1368                    .to_lowercase()
1369                    .contains(ql);
1370                r.docstring_matched = if !other_hit && doc_hit {
1371                    Some(true)
1372                } else {
1373                    None
1374                };
1375            }
1376        }
1377
1378        // Build the effective filter map: start from explicit `opts.filter`,
1379        // then fold legacy `category` / `installed_only` in only if the
1380        // corresponding key is not already set (explicit filter wins).
1381        let mut filter_map: std::collections::HashMap<String, serde_json::Value> =
1382            opts.filter.unwrap_or_default();
1383        if let Some(cat) = category {
1384            filter_map
1385                .entry("category".to_string())
1386                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1387        }
1388        if let Some(only) = installed_only {
1389            // Preserve prior semantic: `installed_only=Some(false)` was a
1390            // no-op (it did not force `installed=false`). Only fold when
1391            // explicitly true.
1392            if only {
1393                filter_map
1394                    .entry("installed".to_string())
1395                    .or_insert(serde_json::Value::Bool(true));
1396            }
1397        }
1398
1399        // Resolve sort keys up-front so an invalid sort string errors out
1400        // before we touch results.
1401        let sort_str = opts.sort.as_deref().unwrap_or("-installed,name");
1402        let sort_keys = parse_sort(sort_str)?;
1403
1404        // Resolve projection fields; this also rejects unknown `verbose`
1405        // values before any heavy work.
1406        let fields = resolve_fields(
1407            opts.verbose.as_deref(),
1408            opts.fields.as_deref(),
1409            HUB_SEARCH_SUMMARY,
1410            HUB_SEARCH_FULL,
1411        )?;
1412        let include_docstring = fields.iter().any(|f| f == "docstring");
1413
1414        // Serialize each result to a Value (docstring optionally attached)
1415        // so filter/sort/projection work uniformly on JSON values.
1416        let mut items: Vec<serde_json::Value> = results
1417            .iter()
1418            .map(|r| r.to_value_with_optional_docstring(include_docstring))
1419            .collect();
1420
1421        // Filter AFTER serialization so filter keys can reference
1422        // projection-level shape (e.g. `category`, `installed`).
1423        if !filter_map.is_empty() {
1424            items.retain(|v| matches_filter(v, &filter_map));
1425        }
1426
1427        // Sort.
1428        apply_sort_by_value(&mut items, &sort_keys);
1429
1430        // Limit. `limit = Some(0)` means "no limit" (return all results)
1431        // — mirrors the `empty=all & some=filter` idiom used across the
1432        // list-tool contract. `None` falls back to the default cap (50).
1433        let total = items.len();
1434        let limit = opts.limit.unwrap_or(50);
1435        if limit > 0 {
1436            items.truncate(limit);
1437        }
1438
1439        // Projection (after truncation — unselected fields are stripped
1440        // from the kept entries only).
1441        let projected: Vec<serde_json::Value> = items
1442            .into_iter()
1443            .map(|v| project_fields(v, &fields))
1444            .collect();
1445
1446        // Collect discovered sources for transparency.
1447        // Warnings from this call (e.g. config.toml parse failure) are
1448        // already present in `warnings` from `fetch_remote_indices` above;
1449        // use a throwaway buffer here to avoid duplicating them.
1450        let mut _src_warnings: Vec<String> = Vec::new();
1451        let mut sources = discover_index_urls(&app_dir, &mut _src_warnings)?;
1452        // Surface local_indices paths in `sources` so callers can see
1453        // what was actually consulted (transparency / debug aid).
1454        sources.extend(local_index_paths);
1455
1456        let mut json = serde_json::json!({
1457            "results": projected,
1458            "total": total,
1459            "sources": sources,
1460        });
1461        if !warnings.is_empty() {
1462            json["warnings"] = serde_json::json!(warnings);
1463        }
1464        Ok(json.to_string())
1465    }
1466
1467    /// Aggregate hub index across all discovered cache sources.
1468    ///
1469    /// Reads the cached `hub_index.json` for each registered source URL
1470    /// (cache-only, no network fetch). Sources that are missing from cache
1471    /// or whose cache file is corrupt are skipped and a warning is collected;
1472    /// the aggregate still succeeds with the remaining sources.
1473    ///
1474    /// Registry-load failures (corrupt `hub_registries.json`) are also
1475    /// demoted to warnings rather than hard errors. Any warnings accumulated
1476    /// before the failure are preserved in the returned `warnings` vec so
1477    /// they reach the MCP wire response.
1478    ///
1479    /// # Returns
1480    /// `Ok((merged_index, warnings))` — always Ok; `warnings` contains any
1481    /// per-source failure messages including registry-load failures.
1482    pub(crate) fn aggregate_index(
1483        &self,
1484    ) -> Result<(HubIndex, Vec<String>), super::error::ServiceError> {
1485        let app_dir = self.log_config.app_dir();
1486        let mut warnings: Vec<String> = Vec::new();
1487
1488        // Discover source URLs (registries + manifest + seeds).
1489        // On failure, demote the error to a warning and return a degraded
1490        // (empty) response. Preserves any warnings already collected
1491        // (e.g. config.toml parse warning) before the failure.
1492        let urls = match discover_index_urls(&app_dir, &mut warnings) {
1493            Ok(u) => u,
1494            Err(e) => {
1495                warnings.push(format!("hub registry discovery failed: {e}"));
1496                return Ok((
1497                    HubIndex {
1498                        schema_version: "hub_index/v0".into(),
1499                        updated_at: String::new(),
1500                        packages: Vec::new(),
1501                    },
1502                    warnings,
1503                ));
1504            }
1505        };
1506
1507        // Empty URL list: return empty index (not an error — fresh install).
1508        if urls.is_empty() {
1509            return Ok((
1510                HubIndex {
1511                    schema_version: "hub_index/v0".into(),
1512                    updated_at: String::new(),
1513                    packages: Vec::new(),
1514                },
1515                warnings,
1516            ));
1517        }
1518
1519        // Load each source from cache. Network fetches are intentionally
1520        // avoided here: resource reads happen synchronously in the MCP
1521        // request path and should not block on network I/O. The cache
1522        // is populated by hub_reindex / hub_search (which do fetch).
1523        // Per-source load failures are best-effort: collect as warnings
1524        // and continue with remaining sources.
1525        let mut all_packages: Vec<IndexEntry> = Vec::new();
1526        let mut seen_names: std::collections::HashSet<String> = std::collections::HashSet::new();
1527
1528        for url in &urls {
1529            let merge_packages =
1530                |packages: Vec<IndexEntry>,
1531                 all: &mut Vec<IndexEntry>,
1532                 seen: &mut std::collections::HashSet<String>| {
1533                    for entry in packages {
1534                        if seen.insert(entry.entity.name.clone()) {
1535                            all.push(entry);
1536                        }
1537                    }
1538                };
1539            match load_cached_full(&app_dir, url) {
1540                CacheLookup::Fresh(index) => {
1541                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1542                }
1543                CacheLookup::Stale(index) => {
1544                    // Stale but not absent: merge the data and emit a warning so
1545                    // the caller knows the catalog may be outdated.
1546                    warnings.push(format!(
1547                        "hub cache stale (>{CACHE_TTL_SECS}s) for {url}; run alc_hub_search to refresh"
1548                    ));
1549                    merge_packages(index.packages, &mut all_packages, &mut seen_names);
1550                }
1551                CacheLookup::NotPresent => {
1552                    // Cache file absent — not an error, just skip.
1553                }
1554                CacheLookup::Corrupt(e) => {
1555                    // Cache corruption: surface as warning, continue aggregate.
1556                    warnings.push(format!("hub cache read failed for {url}: {e}"));
1557                }
1558            }
1559        }
1560
1561        Ok((
1562            HubIndex {
1563                schema_version: "hub_index/v0".into(),
1564                updated_at: String::new(),
1565                packages: all_packages,
1566            },
1567            warnings,
1568        ))
1569    }
1570}
1571
1572#[cfg(test)]
1573mod tests {
1574    use super::*;
1575
1576    #[test]
1577    fn repo_to_index_url_github() {
1578        assert_eq!(
1579            repo_to_index_url("https://github.com/ynishi/algocline-bundled-packages"),
1580            Some(
1581                "https://raw.githubusercontent.com/ynishi/algocline-bundled-packages/main/hub_index.json"
1582                    .to_string()
1583            )
1584        );
1585    }
1586
1587    #[test]
1588    fn repo_to_index_url_github_trailing_slash() {
1589        assert_eq!(
1590            repo_to_index_url("https://github.com/user/repo/"),
1591            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1592        );
1593    }
1594
1595    #[test]
1596    fn repo_to_index_url_github_dot_git() {
1597        assert_eq!(
1598            repo_to_index_url("https://github.com/user/repo.git"),
1599            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1600        );
1601    }
1602
1603    #[test]
1604    fn repo_to_index_url_direct_json() {
1605        assert_eq!(
1606            repo_to_index_url("https://example.com/my_index.json"),
1607            Some("https://example.com/my_index.json".to_string())
1608        );
1609    }
1610
1611    #[test]
1612    fn repo_to_index_url_unknown_host_no_json() {
1613        assert_eq!(repo_to_index_url("https://example.com/some-repo"), None);
1614    }
1615
1616    #[test]
1617    fn repo_to_index_url_local_path() {
1618        assert_eq!(repo_to_index_url("/home/user/my-pkg"), None);
1619    }
1620
1621    #[test]
1622    fn cache_key_stable() {
1623        let k1 = cache_key("https://example.com/index.json");
1624        let k2 = cache_key("https://example.com/index.json");
1625        assert_eq!(k1, k2);
1626        assert_eq!(k1.len(), 16); // 16 hex chars
1627    }
1628
1629    #[test]
1630    fn cache_key_different_urls() {
1631        let k1 = cache_key("https://a.com/index.json");
1632        let k2 = cache_key("https://b.com/index.json");
1633        assert_ne!(k1, k2);
1634    }
1635
1636    // NOTE: The init.lua meta / docstring parsing tests have moved to
1637    // `algocline_core::pkg::tests` along with the parser itself. The
1638    // `hub.rs` call-path tests now exercise the typed `PkgEntity` via
1639    // `build_index` / `merge` only.
1640
1641    #[test]
1642    fn merge_dedup_uses_hashset() {
1643        // Verify that merge correctly handles local-only packages
1644        // without O(n*m) behavior (structural test).
1645        let tmp = tempfile::tempdir().unwrap();
1646        let app_dir = AppDir::new(tmp.path().to_path_buf());
1647        let remote = HubIndex {
1648            schema_version: "hub_index/v0".into(),
1649            updated_at: String::new(),
1650            packages: vec![IndexEntry {
1651                entity: PkgEntity {
1652                    name: "remote_only".into(),
1653                    version: Some("1.0".into()),
1654                    description: Some("from remote".into()),
1655                    category: Some("test".into()),
1656                    docstring: None,
1657                    tags: None,
1658                    pkg_type: None,
1659                    type_source: None,
1660                },
1661                source: PackageSource::Unknown,
1662                card_count: 0,
1663                best_card: None,
1664            }],
1665        };
1666
1667        let results = merge(&app_dir, &remote).expect("merge over empty app_dir should succeed");
1668        // Should include remote_only + any locally installed packages
1669        assert!(results.iter().any(|r| r.entity.name == "remote_only"));
1670        let remote_result = results
1671            .iter()
1672            .find(|r| r.entity.name == "remote_only")
1673            .unwrap();
1674        assert_eq!(
1675            remote_result.entity.pkg_type,
1676            Some(PkgType::Runnable),
1677            "pre-type-system index entry must default to Runnable"
1678        );
1679    }
1680
1681    #[test]
1682    fn matches_query_searches_docstring() {
1683        let result = SearchResult {
1684            entity: PkgEntity {
1685                name: "cascade".into(),
1686                version: Some("0.1.0".into()),
1687                description: Some("Multi-level routing".into()),
1688                category: Some("meta".into()),
1689                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1690                tags: None,
1691                pkg_type: None,
1692                type_source: None,
1693            },
1694            source: PackageSource::Unknown,
1695            installed: true,
1696            card_count: 0,
1697            best_card: None,
1698            docstring_matched: None,
1699        };
1700
1701        assert!(matches_query(&result, "thompson"), "docstring match");
1702        assert!(matches_query(&result, "FrugalGPT"), "docstring match case");
1703        assert!(matches_query(&result, "routing"), "description match");
1704        assert!(!matches_query(&result, "bayesian"), "no match");
1705    }
1706
1707    // ─── SearchResult::to_value_with_optional_docstring ────────────
1708    //
1709    // `docstring` is not emitted by the default serde path (via the
1710    // `serialize_entity_without_docstring` custom serializer) and is
1711    // re-attached only when the projection path says so. These tests
1712    // pin the two branches of that helper — they are the hinge that
1713    // `verbose="full"` / `fields=["docstring"]` rely on.
1714
1715    fn sample_search_result() -> SearchResult {
1716        SearchResult {
1717            entity: PkgEntity {
1718                name: "cascade".into(),
1719                version: Some("0.1.0".into()),
1720                description: Some("Multi-level routing".into()),
1721                category: Some("reasoning".into()),
1722                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1723                tags: None,
1724                pkg_type: None,
1725                type_source: None,
1726            },
1727            source: PackageSource::Git {
1728                url: "https://example.com/cascade".into(),
1729                rev: None,
1730            },
1731            installed: true,
1732            card_count: 3,
1733            best_card: None,
1734            docstring_matched: None,
1735        }
1736    }
1737
1738    #[test]
1739    fn to_value_default_omits_docstring() {
1740        let r = sample_search_result();
1741        let v = r.to_value_with_optional_docstring(false);
1742        let obj = v.as_object().expect("object");
1743        assert!(
1744            !obj.contains_key("docstring"),
1745            "default summary must not leak docstring"
1746        );
1747        assert_eq!(obj.get("name").and_then(|x| x.as_str()), Some("cascade"));
1748        // `docstring_matched` is Option<None> → `skip_serializing_if`
1749        // must omit it when the query did not mark a docstring-only hit.
1750        assert!(
1751            !obj.contains_key("docstring_matched"),
1752            "docstring_matched=None must be omitted"
1753        );
1754    }
1755
1756    #[test]
1757    fn to_value_include_reattaches_docstring() {
1758        let r = sample_search_result();
1759        let v = r.to_value_with_optional_docstring(true);
1760        let obj = v.as_object().expect("object");
1761        assert_eq!(
1762            obj.get("docstring").and_then(|x| x.as_str()),
1763            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1764        );
1765    }
1766
1767    #[test]
1768    fn to_value_serializes_docstring_matched_when_set() {
1769        let mut r = sample_search_result();
1770        r.docstring_matched = Some(true);
1771        let v = r.to_value_with_optional_docstring(false);
1772        let obj = v.as_object().expect("object");
1773        assert_eq!(
1774            obj.get("docstring_matched").and_then(|x| x.as_bool()),
1775            Some(true)
1776        );
1777    }
1778
1779    // ─── projection glue ──────────────────────────────────────────
1780    //
1781    // These tests exercise the projection path that `hub_search` uses to
1782    // shape output: `resolve_fields` + `project_fields` applied to a
1783    // `to_value_with_optional_docstring`-serialized entry. They pin the
1784    // wf-sim-verbose contract: `fields` wins over `verbose`, default
1785    // summary preset excludes docstring, `full` preset includes
1786    // docstring, unknown keys silently skipped.
1787
1788    #[test]
1789    fn hub_search_default_summary_excludes_docstring() {
1790        let r = sample_search_result();
1791        let fields = resolve_fields(None, None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1792        let include_docstring = fields.iter().any(|f| f == "docstring");
1793        let v = project_fields(
1794            r.to_value_with_optional_docstring(include_docstring),
1795            &fields,
1796        );
1797        let obj = v.as_object().expect("object");
1798        assert!(
1799            !obj.contains_key("docstring"),
1800            "summary preset must omit docstring"
1801        );
1802        // summary preset fields that are present on the sample entry
1803        for key in ["name", "version", "description", "category", "installed"] {
1804            assert!(obj.contains_key(key), "summary preset key {key} missing");
1805        }
1806    }
1807
1808    #[test]
1809    fn hub_search_verbose_full_includes_docstring() {
1810        let r = sample_search_result();
1811        let fields =
1812            resolve_fields(Some("full"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1813        let include_docstring = fields.iter().any(|f| f == "docstring");
1814        let v = project_fields(
1815            r.to_value_with_optional_docstring(include_docstring),
1816            &fields,
1817        );
1818        let obj = v.as_object().expect("object");
1819        assert_eq!(
1820            obj.get("docstring").and_then(|x| x.as_str()),
1821            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1822        );
1823        // full preset superset keys
1824        for key in ["source", "card_count"] {
1825            assert!(obj.contains_key(key), "full preset key {key} missing");
1826        }
1827    }
1828
1829    #[test]
1830    fn hub_search_fields_beats_verbose() {
1831        let r = sample_search_result();
1832        let explicit = vec!["name".to_string(), "docstring".to_string()];
1833        // verbose=summary normally excludes docstring, but explicit
1834        // fields must win.
1835        let fields = resolve_fields(
1836            Some("summary"),
1837            Some(&explicit),
1838            HUB_SEARCH_SUMMARY,
1839            HUB_SEARCH_FULL,
1840        )
1841        .unwrap();
1842        let include_docstring = fields.iter().any(|f| f == "docstring");
1843        let v = project_fields(
1844            r.to_value_with_optional_docstring(include_docstring),
1845            &fields,
1846        );
1847        let obj = v.as_object().expect("object");
1848        assert_eq!(obj.len(), 2, "only the two requested fields");
1849        assert!(obj.contains_key("name"));
1850        assert!(obj.contains_key("docstring"));
1851    }
1852
1853    #[test]
1854    fn hub_search_fields_unknown_key_silently_skipped() {
1855        let r = sample_search_result();
1856        let explicit = vec!["name".to_string(), "bogus".to_string()];
1857        let fields =
1858            resolve_fields(None, Some(&explicit), HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1859        let v = project_fields(r.to_value_with_optional_docstring(false), &fields);
1860        let obj = v.as_object().expect("object");
1861        assert_eq!(obj.len(), 1, "bogus must not appear");
1862        assert!(obj.contains_key("name"));
1863    }
1864
1865    #[test]
1866    fn hub_search_invalid_verbose_errors() {
1867        let err =
1868            resolve_fields(Some("fat"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap_err();
1869        assert!(
1870            err.contains("fat"),
1871            "error must mention the offending value"
1872        );
1873    }
1874
1875    // ─── docstring_matched classification ─────────────────────────
1876    //
1877    // The query-time classification rule: `docstring_matched = Some(true)`
1878    // only when the query hit docstring AND missed name/description/
1879    // category; otherwise `None` (and therefore omitted from output).
1880    // The logic lives inline in `hub_search`; we re-create it here over a
1881    // tiny local helper so the three cases stay pinned as a contract.
1882
1883    fn classify(r: &SearchResult, query: &str) -> Option<bool> {
1884        let ql = query.to_lowercase();
1885        if query.is_empty() {
1886            return None;
1887        }
1888        let empty = String::new();
1889        let pkg = &r.entity;
1890        let other_hit = pkg.name.to_lowercase().contains(&ql)
1891            || pkg
1892                .description
1893                .as_ref()
1894                .unwrap_or(&empty)
1895                .to_lowercase()
1896                .contains(&ql)
1897            || pkg
1898                .category
1899                .as_ref()
1900                .unwrap_or(&empty)
1901                .to_lowercase()
1902                .contains(&ql);
1903        let doc_hit = pkg
1904            .docstring
1905            .as_ref()
1906            .unwrap_or(&empty)
1907            .to_lowercase()
1908            .contains(&ql);
1909        if !other_hit && doc_hit {
1910            Some(true)
1911        } else {
1912            None
1913        }
1914    }
1915
1916    #[test]
1917    fn docstring_matched_true_when_only_docstring_hits() {
1918        let r = sample_search_result();
1919        // "Thompson" appears only in docstring of the sample entry.
1920        assert_eq!(classify(&r, "thompson"), Some(true));
1921    }
1922
1923    #[test]
1924    fn docstring_matched_none_when_name_also_hits() {
1925        let r = sample_search_result();
1926        // "cascade" hits the name; docstring match is irrelevant now.
1927        assert_eq!(classify(&r, "cascade"), None);
1928    }
1929
1930    #[test]
1931    fn docstring_matched_none_when_description_hits() {
1932        let r = sample_search_result();
1933        // "routing" hits description; should be None.
1934        assert_eq!(classify(&r, "routing"), None);
1935    }
1936
1937    #[test]
1938    fn docstring_matched_none_when_query_empty() {
1939        let r = sample_search_result();
1940        assert_eq!(classify(&r, ""), None);
1941    }
1942
1943    // ─── filter fold (legacy params → filter map) ─────────────────
1944    //
1945    // Behavioural rule: legacy `category` / `installed_only=true` fold
1946    // into the filter map only when the corresponding key is not
1947    // already set (explicit `filter` wins). `installed_only=false` is a
1948    // no-op (preserves prior semantics).
1949
1950    fn build_filter_map(
1951        category: Option<&str>,
1952        installed_only: Option<bool>,
1953        explicit: Option<HashMap<String, serde_json::Value>>,
1954    ) -> HashMap<String, serde_json::Value> {
1955        let mut filter_map = explicit.unwrap_or_default();
1956        if let Some(cat) = category {
1957            filter_map
1958                .entry("category".to_string())
1959                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1960        }
1961        if let Some(only) = installed_only {
1962            if only {
1963                filter_map
1964                    .entry("installed".to_string())
1965                    .or_insert(serde_json::Value::Bool(true));
1966            }
1967        }
1968        filter_map
1969    }
1970
1971    #[test]
1972    fn filter_by_category_via_legacy_param() {
1973        let m = build_filter_map(Some("reasoning"), None, None);
1974        assert_eq!(
1975            m.get("category"),
1976            Some(&serde_json::Value::String("reasoning".to_string()))
1977        );
1978    }
1979
1980    #[test]
1981    fn filter_by_installed_only_via_legacy_param() {
1982        let m = build_filter_map(None, Some(true), None);
1983        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1984    }
1985
1986    #[test]
1987    fn filter_installed_only_false_is_noop() {
1988        let m = build_filter_map(None, Some(false), None);
1989        assert!(
1990            !m.contains_key("installed"),
1991            "installed_only=false should not fold in"
1992        );
1993    }
1994
1995    #[test]
1996    fn filter_beats_legacy_param_on_conflict() {
1997        // Explicit filter says category=meta; legacy says reasoning.
1998        // Explicit must win.
1999        let mut explicit = HashMap::new();
2000        explicit.insert(
2001            "category".to_string(),
2002            serde_json::Value::String("meta".to_string()),
2003        );
2004        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
2005        assert_eq!(
2006            m.get("category"),
2007            Some(&serde_json::Value::String("meta".to_string()))
2008        );
2009    }
2010
2011    #[test]
2012    fn filter_merges_legacy_when_no_conflict() {
2013        // Explicit sets a different key; legacy category should still
2014        // be folded in.
2015        let mut explicit = HashMap::new();
2016        explicit.insert("installed".to_string(), serde_json::Value::Bool(true));
2017        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
2018        assert_eq!(
2019            m.get("category"),
2020            Some(&serde_json::Value::String("reasoning".to_string()))
2021        );
2022        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
2023    }
2024
2025    // ─── load_registries: file-absent vs. corrupt JSON ────────────
2026
2027    #[test]
2028    fn load_registries_missing_file_returns_default() {
2029        let tmp = tempfile::tempdir().unwrap();
2030        let app_dir = AppDir::new(tmp.path().to_path_buf());
2031        // No hub_registries.json created — must return Ok(empty).
2032        let result = load_registries(&app_dir);
2033        assert!(result.is_ok(), "missing file should be Ok: {result:?}");
2034        assert!(result.unwrap().registries.is_empty());
2035    }
2036
2037    #[test]
2038    fn load_registries_corrupt_json_returns_err() {
2039        let tmp = tempfile::tempdir().unwrap();
2040        let app_dir = AppDir::new(tmp.path().to_path_buf());
2041        // Write corrupt JSON to the registries path.
2042        let path = app_dir.hub_registries_json();
2043        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2044        std::fs::write(&path, b"not valid json {{{").unwrap();
2045        let result = load_registries(&app_dir);
2046        assert!(result.is_err(), "corrupt JSON must propagate Err");
2047        let msg = result.unwrap_err().to_string();
2048        assert!(
2049            msg.contains("parse"),
2050            "error message should mention parse: {msg}"
2051        );
2052    }
2053
2054    #[test]
2055    fn load_registries_valid_file_deserializes() {
2056        let tmp = tempfile::tempdir().unwrap();
2057        let app_dir = AppDir::new(tmp.path().to_path_buf());
2058        let path = app_dir.hub_registries_json();
2059        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2060        let content = r#"{"registries":[{"source":"https://github.com/user/repo","origin":"pkg_install","added_at":"2026-01-01T00:00:00Z"}]}"#;
2061        std::fs::write(&path, content).unwrap();
2062        let result = load_registries(&app_dir);
2063        assert!(result.is_ok(), "valid JSON must parse Ok: {result:?}");
2064        let reg = result.unwrap();
2065        assert_eq!(reg.registries.len(), 1);
2066        assert_eq!(reg.registries[0].source, "https://github.com/user/repo");
2067    }
2068
2069    // ─── default sort verification ────────────────────────────────
2070
2071    #[test]
2072    fn default_sort_is_minus_installed_name() {
2073        let keys = parse_sort("-installed,name").unwrap();
2074        assert_eq!(keys.len(), 2);
2075        assert_eq!(keys[0].key, "installed");
2076        assert!(keys[0].desc, "installed must sort desc (true first)");
2077        assert_eq!(keys[1].key, "name");
2078        assert!(!keys[1].desc);
2079
2080        // Apply it against a small vec and confirm the expected order.
2081        let mut items = vec![
2082            serde_json::json!({"installed": false, "name": "zeta"}),
2083            serde_json::json!({"installed": true, "name": "mu"}),
2084            serde_json::json!({"installed": false, "name": "alpha"}),
2085            serde_json::json!({"installed": true, "name": "beta"}),
2086        ];
2087        apply_sort_by_value(&mut items, &keys);
2088        let names: Vec<&str> = items
2089            .iter()
2090            .map(|v| v.get("name").and_then(|x| x.as_str()).unwrap_or(""))
2091            .collect();
2092        assert_eq!(names, vec!["beta", "mu", "alpha", "zeta"]);
2093    }
2094
2095    // ─── Phase 3 MED batch: error-propagation tests ───────────────
2096
2097    // Site 1: collection_url_from_config
2098
2099    #[test]
2100    fn collection_url_from_config_absent_returns_ok_none() {
2101        let tmp = tempfile::tempdir().unwrap();
2102        let app_dir = AppDir::new(tmp.path().to_path_buf());
2103        // No config.toml created — absent file must be Ok(None), not Err.
2104        let result = collection_url_from_config(&app_dir);
2105        assert!(
2106            matches!(result, Ok(None)),
2107            "absent config.toml must return Ok(None), got {result:?}"
2108        );
2109    }
2110
2111    #[test]
2112    fn collection_url_from_config_corrupt_toml_returns_err() {
2113        let tmp = tempfile::tempdir().unwrap();
2114        let app_dir = AppDir::new(tmp.path().to_path_buf());
2115        let path = app_dir.config_toml();
2116        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2117        std::fs::write(&path, b"[hub\ncollection_url = broken{{{{").unwrap();
2118        let result = collection_url_from_config(&app_dir);
2119        assert!(
2120            result.is_err(),
2121            "corrupt TOML must return Err, got {result:?}"
2122        );
2123    }
2124
2125    #[test]
2126    fn collection_url_from_config_valid_returns_url() {
2127        let tmp = tempfile::tempdir().unwrap();
2128        let app_dir = AppDir::new(tmp.path().to_path_buf());
2129        let path = app_dir.config_toml();
2130        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2131        std::fs::write(
2132            &path,
2133            b"[hub]\ncollection_url = \"https://example.com/hub_index.json\"\n",
2134        )
2135        .unwrap();
2136        let result = collection_url_from_config(&app_dir);
2137        assert_eq!(
2138            result.unwrap(),
2139            Some("https://example.com/hub_index.json".to_string())
2140        );
2141    }
2142
2143    #[test]
2144    fn collection_url_from_config_no_hub_section_returns_none() {
2145        let tmp = tempfile::tempdir().unwrap();
2146        let app_dir = AppDir::new(tmp.path().to_path_buf());
2147        let path = app_dir.config_toml();
2148        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
2149        std::fs::write(&path, b"[some_other_section]\nfoo = \"bar\"\n").unwrap();
2150        let result = collection_url_from_config(&app_dir);
2151        assert!(
2152            matches!(result, Ok(None)),
2153            "config without [hub] must return Ok(None), got {result:?}"
2154        );
2155    }
2156
2157    // Site 2: load_cached
2158
2159    #[test]
2160    fn load_cached_absent_returns_ok_none() {
2161        let tmp = tempfile::tempdir().unwrap();
2162        let app_dir = AppDir::new(tmp.path().to_path_buf());
2163        let result = load_cached(&app_dir, "https://example.com/index.json");
2164        assert!(
2165            matches!(result, Ok(None)),
2166            "absent cache file must return Ok(None), got {result:?}"
2167        );
2168    }
2169
2170    #[test]
2171    fn load_cached_corrupt_json_within_ttl_returns_err() {
2172        let tmp = tempfile::tempdir().unwrap();
2173        let app_dir = AppDir::new(tmp.path().to_path_buf());
2174        let url = "https://example.com/index.json";
2175        let dir = cache_dir(&app_dir);
2176        std::fs::create_dir_all(&dir).unwrap();
2177        let path = dir.join(format!("{}.json", cache_key(url)));
2178        std::fs::write(&path, b"not valid json {{{{").unwrap();
2179        // file is freshly written so within TTL
2180        let result = load_cached(&app_dir, url);
2181        assert!(
2182            result.is_err(),
2183            "corrupt JSON within TTL must return Err, got {result:?}"
2184        );
2185    }
2186
2187    #[test]
2188    fn load_cached_valid_json_within_ttl_returns_index() {
2189        let tmp = tempfile::tempdir().unwrap();
2190        let app_dir = AppDir::new(tmp.path().to_path_buf());
2191        let url = "https://example.com/index.json";
2192        let dir = cache_dir(&app_dir);
2193        std::fs::create_dir_all(&dir).unwrap();
2194        let path = dir.join(format!("{}.json", cache_key(url)));
2195        let index_json = r#"{"schema_version":"hub_index/v0","updated_at":"2026-01-01T00:00:00Z","packages":[]}"#;
2196        std::fs::write(&path, index_json).unwrap();
2197        let result = load_cached(&app_dir, url);
2198        assert!(
2199            matches!(result, Ok(Some(_))),
2200            "valid JSON within TTL must return Ok(Some(_)), got {result:?}"
2201        );
2202    }
2203
2204    /// Helper: backdate a file's mtime by `secs` seconds so it appears stale.
2205    fn backdate_file(path: &std::path::Path, secs: u64) {
2206        let past = std::time::SystemTime::now() - std::time::Duration::from_secs(secs);
2207        let times = std::fs::FileTimes::new()
2208            .set_accessed(past)
2209            .set_modified(past);
2210        let f = std::fs::OpenOptions::new()
2211            .write(true)
2212            .open(path)
2213            .expect("open for backdate");
2214        f.set_times(times).expect("set_times");
2215    }
2216
2217    // L-1: load_cached_full returns Stale (not NotPresent) for outdated cache.
2218    #[test]
2219    fn load_cached_full_stale_file_returns_stale_variant() {
2220        let tmp = tempfile::tempdir().unwrap();
2221        let app_dir = AppDir::new(tmp.path().to_path_buf());
2222        let url = "https://stale.example.com/index.json";
2223        // Write a valid cache entry using the helper to get correct serialization.
2224        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2225        // Backdate by 2× TTL to ensure it's stale.
2226        let path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2227        backdate_file(&path, CACHE_TTL_SECS * 2);
2228        let result = load_cached_full(&app_dir, url);
2229        assert!(
2230            matches!(result, CacheLookup::Stale(_)),
2231            "backdated cache must return Stale variant"
2232        );
2233    }
2234
2235    // L-1: aggregate_index with stale cache returns data AND emits warning.
2236    #[tokio::test]
2237    async fn aggregate_index_stale_cache_returns_data_and_warning() {
2238        let tmp = tempfile::tempdir().unwrap();
2239        let app_dir_root = tmp.path().to_path_buf();
2240        let app_dir = AppDir::new(app_dir_root.clone());
2241        let url = "https://stale-agg.example.com/index.json";
2242
2243        // Write a valid cache file with one package.
2244        write_cache_for_url(&app_dir, url, &make_index(vec![("stale_pkg", "0.1.0")]));
2245        // Backdate the cache file so it's stale.
2246        let cache_path = cache_dir(&app_dir).join(format!("{}.json", cache_key(url)));
2247        backdate_file(&cache_path, CACHE_TTL_SECS * 2);
2248
2249        // Register the URL in hub_registries.
2250        let reg_path = app_dir.hub_registries_json();
2251        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2252        let reg_json = serde_json::json!({
2253            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2254        });
2255        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2256
2257        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2258        let (index, warnings) = AppService::aggregate_index(&svc).unwrap();
2259
2260        // Data from stale cache must still be present.
2261        assert!(
2262            index.packages.iter().any(|p| p.entity.name == "stale_pkg"),
2263            "stale package must be included in aggregate, got: {:?}",
2264            index
2265                .packages
2266                .iter()
2267                .map(|p| &p.entity.name)
2268                .collect::<Vec<_>>()
2269        );
2270        // A stale warning must be emitted.
2271        assert!(
2272            warnings
2273                .iter()
2274                .any(|w| w.contains("stale") && w.contains(url)),
2275            "stale cache must emit a warning mentioning the URL, got: {warnings:?}"
2276        );
2277    }
2278
2279    // Site 3: count_evals_for_pkg
2280
2281    #[test]
2282    fn count_evals_for_pkg_absent_dir_returns_zero_no_warnings() {
2283        let tmp = tempfile::tempdir().unwrap();
2284        let app_dir = AppDir::new(tmp.path().to_path_buf());
2285        let mut warnings: Vec<String> = Vec::new();
2286        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2287        assert_eq!(count, 0, "absent evals dir must return 0");
2288        assert!(
2289            warnings.is_empty(),
2290            "absent evals dir must produce no warnings, got {warnings:?}"
2291        );
2292    }
2293
2294    #[test]
2295    fn count_evals_for_pkg_corrupt_meta_surfaces_warning() {
2296        let tmp = tempfile::tempdir().unwrap();
2297        let app_dir = AppDir::new(tmp.path().to_path_buf());
2298        let evals_dir = app_dir.evals_dir();
2299        std::fs::create_dir_all(&evals_dir).unwrap();
2300
2301        // Write a result JSON stub so the file is scanned.
2302        std::fs::write(evals_dir.join("cot_9999.json"), b"{}").unwrap();
2303        // Write a corrupt meta.json for the same stem.
2304        std::fs::write(evals_dir.join("cot_9999.meta.json"), b"not json {{{{").unwrap();
2305
2306        let mut warnings: Vec<String> = Vec::new();
2307        let _count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2308        assert!(
2309            !warnings.is_empty(),
2310            "corrupt meta.json must produce at least one warning, got {warnings:?}"
2311        );
2312        assert!(
2313            warnings[0].contains("parse"),
2314            "warning must mention parse: {}",
2315            warnings[0]
2316        );
2317    }
2318
2319    #[test]
2320    fn count_evals_for_pkg_valid_meta_counts_correctly() {
2321        let tmp = tempfile::tempdir().unwrap();
2322        let app_dir = AppDir::new(tmp.path().to_path_buf());
2323        let evals_dir = app_dir.evals_dir();
2324        std::fs::create_dir_all(&evals_dir).unwrap();
2325
2326        // Write a result JSON + valid meta for strategy "cot".
2327        let meta = r#"{"eval_id":"cot_1","strategy":"cot","timestamp":1}"#;
2328        std::fs::write(evals_dir.join("cot_1.json"), b"{}").unwrap();
2329        std::fs::write(evals_dir.join("cot_1.meta.json"), meta).unwrap();
2330
2331        let mut warnings: Vec<String> = Vec::new();
2332        let count = count_evals_for_pkg(&app_dir, "cot", &mut warnings);
2333        assert_eq!(count, 1, "should count 1 valid eval");
2334        assert!(warnings.is_empty(), "no warnings expected: {warnings:?}");
2335    }
2336
2337    // ─── aggregate_index unit tests ───────────────────────────────
2338
2339    /// Write a minimal HubIndex JSON to the per-source cache for a URL.
2340    fn write_cache_for_url(app_dir: &AppDir, url: &str, index: &HubIndex) {
2341        let dir = cache_dir(app_dir);
2342        std::fs::create_dir_all(&dir).unwrap();
2343        let path = dir.join(format!("{}.json", cache_key(url)));
2344        // justification: test helper, panicking on failure is acceptable in tests
2345        std::fs::write(&path, serde_json::to_string_pretty(index).unwrap()).unwrap();
2346    }
2347
2348    fn make_index(packages: Vec<(&str, &str)>) -> HubIndex {
2349        HubIndex {
2350            schema_version: "hub_index/v0".into(),
2351            updated_at: String::new(),
2352            packages: packages
2353                .into_iter()
2354                .map(|(name, version)| IndexEntry {
2355                    entity: PkgEntity {
2356                        name: name.to_string(),
2357                        version: Some(version.to_string()),
2358                        description: None,
2359                        category: None,
2360                        docstring: None,
2361                        tags: None,
2362                        pkg_type: None,
2363                        type_source: None,
2364                    },
2365                    source: PackageSource::Unknown,
2366                    card_count: 0,
2367                    best_card: None,
2368                })
2369                .collect(),
2370        }
2371    }
2372
2373    // T1: empty sources → empty index, no warnings
2374    #[test]
2375    fn aggregate_index_empty_sources_returns_empty() {
2376        let tmp = tempfile::tempdir().unwrap();
2377        let app_dir = AppDir::new(tmp.path().to_path_buf());
2378        // No registries, no manifest, no seeds in cache → no URLs → empty index.
2379        // discover_index_urls will still produce AUTO_INSTALL_SOURCES seeds,
2380        // but their cache files don't exist → Ok(None) for each → empty result.
2381        let (index, warnings) = {
2382            // Build a minimal AppService-like test by calling the free functions
2383            // and replicating the aggregate_index logic directly.
2384            let mut w: Vec<String> = Vec::new();
2385            let urls = discover_index_urls(&app_dir, &mut w).unwrap();
2386            let mut packages: Vec<IndexEntry> = Vec::new();
2387            let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2388            for url in &urls {
2389                if let Ok(Some(idx)) = load_cached(&app_dir, url) {
2390                    for e in idx.packages {
2391                        if seen.insert(e.entity.name.clone()) {
2392                            packages.push(e);
2393                        }
2394                    }
2395                }
2396            }
2397            (
2398                HubIndex {
2399                    schema_version: "hub_index/v0".into(),
2400                    updated_at: String::new(),
2401                    packages,
2402                },
2403                w,
2404            )
2405        };
2406        assert!(
2407            index.packages.is_empty(),
2408            "no cached sources should produce empty packages"
2409        );
2410        assert!(warnings.is_empty(), "no warnings expected for cache misses");
2411    }
2412
2413    // T1: one source in cache → packages returned
2414    #[test]
2415    fn aggregate_index_one_source_returns_packages() {
2416        let tmp = tempfile::tempdir().unwrap();
2417        let app_dir = AppDir::new(tmp.path().to_path_buf());
2418        let url = "https://example.com/test_index.json";
2419        let source_index = make_index(vec![("cot", "0.1.0"), ("ucb", "0.2.0")]);
2420        write_cache_for_url(&app_dir, url, &source_index);
2421
2422        // Register the URL in hub_registries so discover_index_urls finds it.
2423        let reg_path = app_dir.hub_registries_json();
2424        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2425        let reg_json = serde_json::json!({
2426            "registries": [{"source": url, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2427        });
2428        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2429
2430        let mut warnings: Vec<String> = Vec::new();
2431        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2432        let mut packages: Vec<IndexEntry> = Vec::new();
2433        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2434        for u in &urls {
2435            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2436                for e in idx.packages {
2437                    if seen.insert(e.entity.name.clone()) {
2438                        packages.push(e);
2439                    }
2440                }
2441            }
2442        }
2443
2444        assert!(
2445            packages.iter().any(|p| p.entity.name == "cot"),
2446            "cot expected"
2447        );
2448        assert!(
2449            packages.iter().any(|p| p.entity.name == "ucb"),
2450            "ucb expected"
2451        );
2452    }
2453
2454    // T2: duplicate package across two sources → first source wins
2455    #[test]
2456    fn aggregate_index_deduplicate_by_name_first_wins() {
2457        let tmp = tempfile::tempdir().unwrap();
2458        let app_dir = AppDir::new(tmp.path().to_path_buf());
2459        let url_a = "https://a.example.com/index.json";
2460        let url_b = "https://b.example.com/index.json";
2461
2462        // Both sources have "cot" but different versions.
2463        let idx_a = make_index(vec![("cot", "1.0.0")]);
2464        let idx_b = make_index(vec![("cot", "2.0.0"), ("ucb", "0.1.0")]);
2465        write_cache_for_url(&app_dir, url_a, &idx_a);
2466        write_cache_for_url(&app_dir, url_b, &idx_b);
2467
2468        let reg_path = app_dir.hub_registries_json();
2469        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2470        let reg_json = serde_json::json!({
2471            "registries": [
2472                {"source": url_a, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"},
2473                {"source": url_b, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}
2474            ]
2475        });
2476        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2477
2478        let mut warnings: Vec<String> = Vec::new();
2479        let urls = {
2480            let mut raw = discover_index_urls(&app_dir, &mut warnings).unwrap();
2481            // Restrict to only our two test URLs so seed URLs don't interfere.
2482            raw.retain(|u| u == url_a || u == url_b);
2483            raw
2484        };
2485
2486        let mut packages: Vec<IndexEntry> = Vec::new();
2487        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2488        for u in &urls {
2489            if let Ok(Some(idx)) = load_cached(&app_dir, u) {
2490                for e in idx.packages {
2491                    if seen.insert(e.entity.name.clone()) {
2492                        packages.push(e);
2493                    }
2494                }
2495            }
2496        }
2497
2498        let cot_count = packages.iter().filter(|p| p.entity.name == "cot").count();
2499        assert_eq!(cot_count, 1, "dedup: cot must appear exactly once");
2500        let ucb_count = packages.iter().filter(|p| p.entity.name == "ucb").count();
2501        assert_eq!(ucb_count, 1, "ucb from second source must appear");
2502    }
2503
2504    // T3: corrupt cache file → warning collected, other sources unaffected
2505    #[test]
2506    fn aggregate_index_corrupt_cache_collects_warning() {
2507        let tmp = tempfile::tempdir().unwrap();
2508        let app_dir = AppDir::new(tmp.path().to_path_buf());
2509        let url_corrupt = "https://corrupt.example.com/index.json";
2510
2511        // Write corrupt JSON to the cache slot.
2512        let dir = cache_dir(&app_dir);
2513        std::fs::create_dir_all(&dir).unwrap();
2514        let path = dir.join(format!("{}.json", cache_key(url_corrupt)));
2515        std::fs::write(&path, b"{{{{ not valid json").unwrap();
2516
2517        let reg_path = app_dir.hub_registries_json();
2518        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2519        let reg_json = serde_json::json!({
2520            "registries": [{"source": url_corrupt, "origin": "pkg_install", "added_at": "2026-01-01T00:00:00Z"}]
2521        });
2522        std::fs::write(&reg_path, reg_json.to_string()).unwrap();
2523
2524        let mut warnings: Vec<String> = Vec::new();
2525        let urls = discover_index_urls(&app_dir, &mut warnings).unwrap();
2526        let mut packages: Vec<IndexEntry> = Vec::new();
2527        let mut seen: std::collections::HashSet<String> = std::collections::HashSet::new();
2528        let mut extra_warnings: Vec<String> = Vec::new();
2529        for u in &urls {
2530            match load_cached(&app_dir, u) {
2531                Ok(Some(idx)) => {
2532                    for e in idx.packages {
2533                        if seen.insert(e.entity.name.clone()) {
2534                            packages.push(e);
2535                        }
2536                    }
2537                }
2538                Ok(None) => {}
2539                Err(e) => extra_warnings.push(format!("hub cache read failed for {u}: {e}")),
2540            }
2541        }
2542
2543        assert!(
2544            !extra_warnings.is_empty(),
2545            "corrupt cache must produce a warning"
2546        );
2547        assert!(
2548            extra_warnings[0].contains("hub cache read failed"),
2549            "warning text mismatch: {}",
2550            extra_warnings[0]
2551        );
2552        assert!(packages.is_empty(), "no packages from corrupt source");
2553    }
2554
2555    // M-2: registry-load failure is demoted to a warning; accumulated
2556    // warnings before the failure are preserved in the returned vec.
2557    #[tokio::test]
2558    async fn aggregate_index_registry_failure_returns_ok_with_warning() {
2559        let tmp = tempfile::tempdir().unwrap();
2560        let app_dir_root = tmp.path().to_path_buf();
2561
2562        // Write corrupt hub_registries.json so load_registries fails.
2563        let reg_path = AppDir::new(app_dir_root.clone()).hub_registries_json();
2564        std::fs::create_dir_all(reg_path.parent().unwrap()).unwrap();
2565        std::fs::write(&reg_path, b"{{{{ not valid json").unwrap();
2566
2567        // Also write a corrupt config.toml to generate a pre-registry warning.
2568        // (config.toml hub.collection_url parse warns before the registry step.)
2569        // We skip this to keep the test minimal — just verify registry failure
2570        // demotes to warning and result is Ok.
2571
2572        let svc = super::super::test_support::make_app_service_at(app_dir_root).await;
2573        let result = AppService::aggregate_index(&svc);
2574        assert!(
2575            result.is_ok(),
2576            "aggregate_index must return Ok even on registry-load failure, got: {result:?}"
2577        );
2578        let (index, warnings) = result.unwrap();
2579        assert!(
2580            index.packages.is_empty(),
2581            "degraded response must have empty packages"
2582        );
2583        assert!(
2584            !warnings.is_empty(),
2585            "registry-load failure must produce a warning"
2586        );
2587        assert!(
2588            warnings
2589                .iter()
2590                .any(|w| w.contains("hub registry discovery failed")),
2591            "warning must mention registry discovery failure, got: {warnings:?}"
2592        );
2593    }
2594}