Skip to main content

algocline_app/service/
hub.rs

1//! Hub — package discovery, search, and index management.
2//!
3//! The Hub is algocline's package registry layer.  It aggregates remote
4//! index data with local install state so that users (via AI) can
5//! **discover** packages they haven't installed yet, and **inspect**
6//! installed packages with full Card and eval statistics.
7//!
8//! ## Staged design
9//!
10//! | Stage | Scope | Status |
11//! |-------|-------|--------|
12//! | **1** | Card Collection install, Pkg-bundled cards | Done |
13//! | **2** | Hub MCP tools (`hub_search`, `hub_info`, `hub_reindex`), local index | Done |
14//! | **3** | Aggregated remote collection index, `hub_publish`, LP | Planned |
15//!
16//! ## MCP tools
17//!
18//! | Tool | Description |
19//! |------|-------------|
20//! | `alc_hub_search` | Discover packages across remote + local indices |
21//! | `alc_hub_info` | Detailed single-package view (meta + cards + aliases + stats) |
22//! | `alc_hub_reindex` | Rebuild index from local packages or a repo checkout |
23//!
24//! ## Index schema (`hub_index/v0`)
25//!
26//! ```json
27//! {
28//!   "schema_version": "hub_index/v0",
29//!   "updated_at": "2026-04-12T10:00:00Z",
30//!   "packages": [{
31//!     "name": "cot",
32//!     "version": "0.1.0",
33//!     "description": "Chain-of-Thought prompting",
34//!     "category": "reasoning",
35//!     "source": "https://github.com/...",
36//!     "card_count": 3,
37//!     "best_card": { "card_id": "...", "model": "...", "pass_rate": 0.82, "scenario": "..." }
38//!   }]
39//! }
40//! ```
41//!
42//! Index generation uses `init.lua` M.meta parsing only — no Lua VM
43//! required.  This keeps the index buildable in CI environments.
44//!
45//! ## Index URL discovery (4-tier)
46//!
47//! Sources are checked in priority order; URLs are deduplicated:
48//!
49//!   0. **Collection URL** — `[hub].collection_url` in `~/.algocline/config.toml`.
50//!      Aggregated index containing all known packages (Stage 3).
51//!   1. **Hub registries** — `~/.algocline/hub_registries.json`, auto-populated
52//!      by `pkg_install` and `card_install`.
53//!   2. **Installed manifest** — `~/.algocline/installed.json`, fallback for
54//!      sources registered before registries existed.
55//!   3. **Compiled-in seeds** — `AUTO_INSTALL_SOURCES` for first-run bootstrap.
56//!
57//! GitHub repo URLs are transformed to raw index URLs:
58//!
59//! ```text
60//! https://github.com/{owner}/{repo}
61//!   → https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
62//! ```
63//!
64//! ## Caching
65//!
66//! Remote indices are cached per-source at
67//! `~/.algocline/hub_cache/{hash}.json` where hash is FNV-1a of the
68//! URL.  TTL is 1 hour.
69//!
70//! ## Registry persistence
71//!
72//! `~/.algocline/hub_registries.json` records source URLs from
73//! `pkg_install` and `card_install`.  Written atomically (tempfile +
74//! rename) to avoid corruption on interruption.
75
76use std::collections::{HashMap, HashSet};
77use std::path::PathBuf;
78
79use serde::{Deserialize, Serialize};
80
81use algocline_core::{AppDir, PkgEntity};
82
83use super::list_opts::{
84    apply_sort_by_value, matches_filter, parse_sort, project_fields, resolve_fields, ListOpts,
85    HUB_SEARCH_FULL, HUB_SEARCH_SUMMARY,
86};
87use super::manifest;
88use super::resolve::AUTO_INSTALL_SOURCES;
89use super::source::PackageSource;
90use super::AppService;
91
92// ─── Constants ─────────────────────────────────────────────────
93
94/// Cache TTL in seconds (1 hour).
95const CACHE_TTL_SECS: u64 = 3600;
96
97/// HTTP request timeout (30 seconds).
98const HTTP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
99
100// ─── Index schema ──────────────────────────────────────────────
101
102/// Remote index — same shape as the local index so merge is trivial.
103#[derive(Debug, Clone, Serialize, Deserialize)]
104pub(crate) struct HubIndex {
105    pub schema_version: String,
106    #[serde(default)]
107    pub updated_at: String,
108    #[serde(default)]
109    pub packages: Vec<IndexEntry>,
110}
111
112/// One package in the index.
113///
114/// `entity` carries the canonical Lua `M.meta` projection (name, version,
115/// description, category, docstring) via `#[serde(flatten)]` so the wire
116/// shape is identical to the pre-refactor flat-object layout. `source`
117/// is the typed package source; `card_count` / `best_card` are hub-side
118/// enrichments computed at index-build time.
119#[derive(Debug, Clone, Serialize, Deserialize)]
120pub(crate) struct IndexEntry {
121    #[serde(flatten)]
122    pub entity: PkgEntity,
123    /// How this package was obtained. Typed on write; legacy bare strings
124    /// in pre-migration `hub_index.json` deserialize via the serde shim
125    /// on `PackageSource` (see `service::source`).
126    #[serde(default)]
127    pub source: PackageSource,
128    #[serde(default)]
129    pub card_count: usize,
130    #[serde(default)]
131    pub best_card: Option<BestCard>,
132}
133
134/// Best card summary within a package.
135#[derive(Debug, Clone, Serialize, Deserialize)]
136pub(crate) struct BestCard {
137    pub card_id: String,
138    #[serde(default)]
139    pub model: String,
140    #[serde(default)]
141    pub pass_rate: f64,
142    #[serde(default)]
143    pub scenario: String,
144}
145
146/// Search result — index entry enriched with local install state.
147///
148/// `entity.docstring` is `skip_serializing` (via the `skip_docstring`
149/// custom serializer on the flattened struct) so the default serde output
150/// never exposes the docstring field — docstrings can be large and
151/// dominate payload size. The `hub_search` projection path re-attaches
152/// the docstring to the output object when the resolved field set
153/// contains `"docstring"`, via
154/// [`SearchResult::to_value_with_optional_docstring`].
155///
156/// `docstring_matched` is a query-time signal: it is `Some(true)` only
157/// when the query hit docstring and none of {name, description, category}.
158/// Otherwise (no query, or query hit any of the other fields) it is
159/// `None` and omitted from the output.
160///
161/// Because `#[serde(flatten)]` composes poorly with field-level
162/// `skip_serializing`, we carry the non-docstring part of `PkgEntity`
163/// via a custom `serialize_entity_without_docstring` path rather than a
164/// bare `#[serde(flatten)]`. The struct still holds a full `PkgEntity`
165/// internally for consistency with `IndexEntry`.
166#[derive(Debug, Clone, Serialize)]
167struct SearchResult {
168    #[serde(flatten, serialize_with = "serialize_entity_without_docstring")]
169    entity: PkgEntity,
170    /// Typed source (mirrors `IndexEntry.source`).
171    source: PackageSource,
172    installed: bool,
173    card_count: usize,
174    best_card: Option<BestCard>,
175    #[serde(skip_serializing_if = "Option::is_none")]
176    docstring_matched: Option<bool>,
177}
178
179/// Serialize a `PkgEntity` as a flat JSON object, intentionally dropping
180/// the `docstring` field so large docstrings do not dominate `hub_search`
181/// payloads. The projection path re-attaches docstring via
182/// [`SearchResult::to_value_with_optional_docstring`].
183fn serialize_entity_without_docstring<S>(entity: &PkgEntity, ser: S) -> Result<S::Ok, S::Error>
184where
185    S: serde::Serializer,
186{
187    use serde::ser::SerializeMap;
188    let mut map = ser.serialize_map(Some(4))?;
189    map.serialize_entry("name", &entity.name)?;
190    map.serialize_entry("version", &entity.version)?;
191    map.serialize_entry("description", &entity.description)?;
192    map.serialize_entry("category", &entity.category)?;
193    map.end()
194}
195
196impl SearchResult {
197    /// Serialize `self` to a JSON `Value`, optionally re-attaching
198    /// `docstring` to the resulting object.
199    ///
200    /// `skip_serializing` removes `docstring` from every serde output
201    /// path. When projection selects `docstring` as an output field, we
202    /// need to put it back — this helper bridges that gap by inserting
203    /// the field manually into the resulting `Value::Object`.
204    ///
205    /// Returns the original `Value` unchanged if serialization produced
206    /// a non-object (should not happen for `SearchResult`, but we stay
207    /// defensive because the downstream `project_fields` contract
208    /// tolerates non-objects).
209    fn to_value_with_optional_docstring(&self, include_docstring: bool) -> serde_json::Value {
210        let mut v = serde_json::to_value(self).unwrap_or(serde_json::Value::Null);
211        if include_docstring {
212            if let serde_json::Value::Object(ref mut map) = v {
213                let doc = self.entity.docstring.clone().unwrap_or_default();
214                map.insert("docstring".to_string(), serde_json::Value::String(doc));
215            }
216        }
217        v
218    }
219}
220
221// ─── Hub registries ───────────────────────────────────────────
222//
223// Persistent file (`~/.algocline/hub_registries.json`) that records
224// source URLs from `pkg_install` and `card_install`.  This is the
225// primary source for Hub index URL discovery — the manifest and
226// `AUTO_INSTALL_SOURCES` serve as fallback seeds.
227
228/// One entry in `hub_registries.json`.
229#[derive(Debug, Clone, Serialize, Deserialize)]
230pub(crate) struct RegistryEntry {
231    /// Original source URL (Git repo or local path).
232    pub source: String,
233    /// How it was registered: "pkg_install" or "card_install".
234    pub origin: String,
235    /// ISO 8601 timestamp of when the entry was added.
236    pub added_at: String,
237}
238
239/// Top-level registries file.
240#[derive(Debug, Clone, Serialize, Deserialize, Default)]
241pub(crate) struct HubRegistries {
242    pub registries: Vec<RegistryEntry>,
243}
244
245fn registries_path(app_dir: &AppDir) -> PathBuf {
246    app_dir.hub_registries_json()
247}
248
249/// Load registries from disk.  Returns empty list if file is missing.
250fn load_registries(app_dir: &AppDir) -> HubRegistries {
251    let path = registries_path(app_dir);
252    if !path.exists() {
253        return HubRegistries::default();
254    }
255    std::fs::read_to_string(&path)
256        .ok()
257        .and_then(|c| serde_json::from_str(&c).ok())
258        .unwrap_or_default()
259}
260
261/// Register a source URL.  Deduplicates by normalized URL.
262///
263/// Returns `Ok(())` on success or when the input is skipped (empty /
264/// local path / already registered). Filesystem failures are returned
265/// as `Err(String)` so callers can surface them on the MCP wire
266/// response — the registry is best-effort relative to the `pkg_install`
267/// itself, but the caller still needs to know when it silently failed
268/// (otherwise hub discovery degrades without any signal).
269///
270/// Uses atomic write (tempfile + rename) to avoid partial writes if
271/// the process is interrupted. Read-modify-write is not locked across
272/// processes, but MCP servers are single-process so this is safe in
273/// practice.
274pub(crate) fn register_source(app_dir: &AppDir, source: &str, origin: &str) -> Result<(), String> {
275    let normalized = source.trim_end_matches('/').to_string();
276    if normalized.is_empty() {
277        return Ok(());
278    }
279    // Skip local paths — they can't host a remote index
280    if normalized.starts_with('/') || normalized.starts_with('.') {
281        return Ok(());
282    }
283
284    let path = registries_path(app_dir);
285    if let Some(parent) = path.parent() {
286        std::fs::create_dir_all(parent).map_err(|e| {
287            format!(
288                "failed to create hub registries dir {}: {e}",
289                parent.display()
290            )
291        })?;
292    }
293
294    // Re-read from disk right before write to minimize TOCTOU window
295    let mut reg = load_registries(app_dir);
296
297    // Already registered?
298    if reg
299        .registries
300        .iter()
301        .any(|e| e.source.trim_end_matches('/') == normalized)
302    {
303        return Ok(());
304    }
305
306    reg.registries.push(RegistryEntry {
307        source: normalized,
308        origin: origin.to_string(),
309        added_at: manifest::now_iso8601(),
310    });
311
312    // Atomic write: write to temp file, then rename
313    let json = serde_json::to_string_pretty(&reg)
314        .map_err(|e| format!("failed to serialize hub registries: {e}"))?;
315    let tmp_path = path.with_extension("json.tmp");
316    std::fs::write(&tmp_path, &json).map_err(|e| {
317        format!(
318            "failed to write hub registries tmp {}: {e}",
319            tmp_path.display()
320        )
321    })?;
322    std::fs::rename(&tmp_path, &path).map_err(|e| {
323        // Best-effort cleanup of the stale tmp file on rename failure.
324        let _ = std::fs::remove_file(&tmp_path);
325        format!(
326            "failed to atomically rename hub registries onto {}: {e}",
327            path.display()
328        )
329    })
330}
331
332// ─── Hub config ──────────────────────────────────────────────
333//
334// Optional `[hub]` section in `~/.algocline/config.toml`:
335//
336//   [hub]
337//   collection_url = "https://raw.githubusercontent.com/.../hub_index.json"
338//
339// When set, this is fetched as Tier 0 (the aggregated collection
340// index containing all known packages, including uninstalled ones).
341
342/// Read the `[hub].collection_url` from `~/.algocline/config.toml`.
343fn collection_url_from_config(app_dir: &AppDir) -> Option<String> {
344    let path = app_dir.config_toml();
345    let content = std::fs::read_to_string(&path).ok()?;
346    let doc: toml_edit::DocumentMut = content.parse().ok()?;
347    let url = doc
348        .get("hub")?
349        .get("collection_url")?
350        .as_str()?
351        .trim()
352        .to_string();
353    if url.is_empty() {
354        None
355    } else {
356        Some(url)
357    }
358}
359
360// ─── Index URL discovery ──────────────────────────────────────
361//
362// Derives remote index URLs from:
363//   0. Hub Collection URL (from config.toml) — aggregated index
364//   1. Hub registries (`hub_registries.json`) — primary source
365//   2. Unique `source` fields in the installed-packages manifest
366//   3. `AUTO_INSTALL_SOURCES` as fallback seeds (for first run)
367//
368// GitHub repos are transformed:
369//   https://github.com/{owner}/{repo}  →
370//   https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
371
372/// Convert a GitHub repo URL to a raw `hub_index.json` URL.
373/// Returns `None` for non-GitHub URLs (future: support other hosts).
374fn repo_to_index_url(repo_url: &str) -> Option<String> {
375    let trimmed = repo_url.trim_end_matches('/').trim_end_matches(".git");
376    if let Some(path) = trimmed.strip_prefix("https://github.com/") {
377        // path = "owner/repo"
378        let parts: Vec<&str> = path.splitn(3, '/').collect();
379        if parts.len() >= 2 {
380            return Some(format!(
381                "https://raw.githubusercontent.com/{}/{}/main/hub_index.json",
382                parts[0], parts[1]
383            ));
384        }
385    }
386    // Non-GitHub URL: assume it's already a direct index URL
387    if trimmed.ends_with(".json") {
388        Some(trimmed.to_string())
389    } else {
390        None
391    }
392}
393
394/// Collect unique index URLs from config + registries + manifest + bundled seeds.
395///
396/// Returns `Err` if the installed manifest cannot be read (corrupt JSON /
397/// permission denied). The function intentionally surfaces manifest-read
398/// failures rather than silently skipping — callers feed these URLs into
399/// hub resolution, and a partial URL set is indistinguishable from a
400/// corrupt manifest without the signal.
401fn discover_index_urls(app_dir: &AppDir) -> Result<Vec<String>, String> {
402    let mut index_urls: Vec<String> = Vec::new();
403
404    // 0. From config.toml [hub].collection_url (Tier 0 — aggregated collection)
405    if let Some(url) = collection_url_from_config(app_dir) {
406        index_urls.push(url);
407    }
408
409    let mut repo_urls: HashSet<String> = HashSet::new();
410
411    // 1. From hub registries (primary)
412    let reg = load_registries(app_dir);
413    for entry in &reg.registries {
414        let normalized = entry.source.trim_end_matches('/').to_string();
415        if !normalized.is_empty() {
416            repo_urls.insert(normalized);
417        }
418    }
419
420    // 2. From manifest (catch sources registered before hub_registries existed).
421    // Only Git-variant sources can host a remote hub_index.json; other variants
422    // (Path / Installed / Bundled / Unknown) are skipped by `git_url()` returning None.
423    let m = manifest::load_manifest(app_dir)?;
424    for entry in m.packages.values() {
425        if let Some(url) = entry.source.git_url() {
426            let normalized = url.trim_end_matches('/').to_string();
427            if !normalized.is_empty() {
428                repo_urls.insert(normalized);
429            }
430        }
431    }
432
433    // 3. Fallback: bundled sources (ensures at least these are checked)
434    for url in AUTO_INSTALL_SOURCES {
435        repo_urls.insert(url.to_string());
436    }
437
438    // 4. Transform repo URLs → index URLs, dedup against Tier 0
439    let existing: HashSet<String> = index_urls.iter().cloned().collect();
440    let mut derived: Vec<String> = repo_urls
441        .iter()
442        .filter_map(|url| repo_to_index_url(url))
443        .filter(|url| !existing.contains(url))
444        .collect();
445    derived.sort();
446    derived.dedup();
447    index_urls.extend(derived);
448
449    Ok(index_urls)
450}
451
452// ─── Per-source cache ─────────────────────────────────────────
453//
454// Each remote index is cached separately at
455// `~/.algocline/hub_cache/{hash}.json` where hash is derived from
456// the index URL. This avoids mixing data from different registries
457// and allows per-source TTL validation.
458
459fn cache_dir(app_dir: &AppDir) -> PathBuf {
460    app_dir.hub_cache_dir()
461}
462
463fn cache_key(url: &str) -> String {
464    // Simple hash: use the URL bytes to produce a stable hex string.
465    // Avoids pulling in a hash crate — good enough for cache file naming.
466    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV-1a offset basis
467    for b in url.as_bytes() {
468        h ^= *b as u64;
469        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
470    }
471    format!("{h:016x}")
472}
473
474/// Load cached remote index for a specific URL if fresh (within TTL).
475fn load_cached(app_dir: &AppDir, url: &str) -> Option<HubIndex> {
476    let dir = cache_dir(app_dir);
477    let path = dir.join(format!("{}.json", cache_key(url)));
478    if !path.exists() {
479        return None;
480    }
481    let metadata = std::fs::metadata(&path).ok()?;
482    let age = metadata.modified().ok()?.elapsed().ok()?;
483    if age.as_secs() > CACHE_TTL_SECS {
484        return None;
485    }
486    let content = std::fs::read_to_string(&path).ok()?;
487    serde_json::from_str(&content).ok()
488}
489
490/// Save remote index to per-source cache file.
491///
492/// Returns `Ok(())` on success. Cache write failures are returned as
493/// `Err(String)`; the caller (`fetch_one`) carries them out of band so
494/// hub fetch still completes (the index is in memory) but the warning
495/// surfaces to the MCP wire response via the existing `warnings` channel.
496fn save_cached(app_dir: &AppDir, url: &str, index: &HubIndex) -> Result<(), String> {
497    let dir = cache_dir(app_dir);
498    std::fs::create_dir_all(&dir)
499        .map_err(|e| format!("failed to create hub cache dir {}: {e}", dir.display()))?;
500    let path = dir.join(format!("{}.json", cache_key(url)));
501    let json = serde_json::to_string_pretty(index)
502        .map_err(|e| format!("failed to serialize hub cache: {e}"))?;
503    std::fs::write(&path, json)
504        .map_err(|e| format!("failed to write hub cache {}: {e}", path.display()))
505}
506
507// ─── Remote fetch ──────────────────────────────────────────────
508
509/// Fetch a single remote index by URL, using per-source cache.
510///
511/// Returns the index plus an optional cache-write warning. The warning
512/// is `Some(_)` only when the network fetch succeeded but persisting
513/// the cache to disk failed — the data flow is unaffected, but the
514/// caller surfaces the warning so the operator can fix the underlying
515/// disk issue.
516fn fetch_one(app_dir: &AppDir, url: &str) -> Result<(HubIndex, Option<String>), String> {
517    if let Some(cached) = load_cached(app_dir, url) {
518        return Ok((cached, None));
519    }
520
521    let agent = ureq::Agent::new_with_config(
522        ureq::config::Config::builder()
523            .timeout_global(Some(HTTP_TIMEOUT))
524            .build(),
525    );
526    let body: String = agent
527        .get(url)
528        .call()
529        .map_err(|e| format!("Failed to fetch {url}: {e}"))?
530        .body_mut()
531        .read_to_string()
532        .map_err(|e| format!("Failed to read response from {url}: {e}"))?;
533
534    let index: HubIndex = serde_json::from_str(&body)
535        .map_err(|e| format!("Failed to parse index from {url}: {e}"))?;
536
537    let cache_warning = save_cached(app_dir, url, &index)
538        .err()
539        .map(|e| format!("hub cache write for {url}: {e}"));
540    Ok((index, cache_warning))
541}
542
543/// Fetch all discovered remote indices and merge into one.
544/// Falls back gracefully: failed sources are skipped with warnings.
545fn fetch_remote_indices(app_dir: &AppDir) -> Result<(HubIndex, Vec<String>), String> {
546    let urls = discover_index_urls(app_dir)?;
547    let mut all_packages: Vec<IndexEntry> = Vec::new();
548    let mut seen_names: HashSet<String> = HashSet::new();
549    let mut warnings: Vec<String> = Vec::new();
550
551    for url in &urls {
552        match fetch_one(app_dir, url) {
553            Ok((index, cache_warning)) => {
554                for entry in index.packages {
555                    if seen_names.insert(entry.entity.name.clone()) {
556                        all_packages.push(entry);
557                    }
558                    // If duplicate name across sources, first wins
559                }
560                if let Some(w) = cache_warning {
561                    warnings.push(w);
562                }
563            }
564            Err(e) => {
565                warnings.push(e);
566            }
567        }
568    }
569
570    if all_packages.is_empty() && !warnings.is_empty() {
571        warnings.insert(
572            0,
573            "all remote indices unavailable, showing local packages only".to_string(),
574        );
575    }
576
577    let merged = HubIndex {
578        schema_version: "hub_index/v0".into(),
579        updated_at: String::new(),
580        packages: all_packages,
581    };
582    Ok((merged, warnings))
583}
584
585// ─── Local state ───────────────────────────────────────────────
586
587/// Build a set of locally installed package names from `installed.json`
588/// and the `~/.algocline/packages/` directory.
589fn installed_packages(app_dir: &AppDir) -> Result<HashMap<String, Option<String>>, String> {
590    let mut map = HashMap::new();
591
592    // From manifest (has version info)
593    let m = manifest::load_manifest(app_dir)?;
594    for (name, entry) in &m.packages {
595        map.insert(name.clone(), entry.version.clone());
596    }
597
598    // Also scan packages/ dir in case manifest is stale
599    let pkg_dir = app_dir.packages_dir();
600    if let Ok(entries) = std::fs::read_dir(&pkg_dir) {
601        for entry in entries.flatten() {
602            if entry.path().is_dir() {
603                if let Some(name) = entry.file_name().to_str() {
604                    map.entry(name.to_string()).or_insert(None);
605                }
606            }
607        }
608    }
609
610    Ok(map)
611}
612
613/// Count local cards per package from `{app_dir}/cards/{pkg}/`.
614fn local_card_counts(app_dir: &AppDir) -> HashMap<String, usize> {
615    let mut map = HashMap::new();
616    let cards_dir = app_dir.cards_dir();
617    let entries = match std::fs::read_dir(&cards_dir) {
618        Ok(e) => e,
619        Err(_) => return map,
620    };
621    for entry in entries.flatten() {
622        if !entry.path().is_dir() {
623            continue;
624        }
625        let pkg = match entry.file_name().to_str() {
626            Some(n) => n.to_string(),
627            None => continue,
628        };
629        let count = std::fs::read_dir(entry.path())
630            .map(|es| {
631                es.flatten()
632                    .filter(|e| e.path().extension().is_some_and(|ext| ext == "toml"))
633                    .count()
634            })
635            .unwrap_or(0);
636        if count > 0 {
637            map.insert(pkg, count);
638        }
639    }
640    map
641}
642
643/// Count eval results for a specific package by scanning `{app_dir}/evals/`.
644///
645/// Reads only `.meta.json` files (lightweight) to check the strategy field.
646/// Falls back to reading full eval JSON if meta is missing.
647fn count_evals_for_pkg(app_dir: &AppDir, pkg: &str) -> usize {
648    let evals_dir = app_dir.evals_dir();
649    let entries = match std::fs::read_dir(&evals_dir) {
650        Ok(e) => e,
651        Err(_) => return 0,
652    };
653
654    // Collect all filenames first so ordering doesn't matter.
655    // We track stems that have a .meta.json to avoid reading the full eval JSON.
656    let mut meta_stems: HashSet<String> = HashSet::new();
657    let mut meta_matches: usize = 0;
658    let mut non_meta_paths: Vec<(PathBuf, String)> = Vec::new(); // (path, stem)
659
660    for entry in entries.flatten() {
661        let path = entry.path();
662        let name = match path.file_name().and_then(|n| n.to_str()) {
663            Some(n) => n.to_string(),
664            None => continue,
665        };
666
667        if name.ends_with(".meta.json") {
668            let stem = name.trim_end_matches(".meta.json").to_string();
669            meta_stems.insert(stem);
670            if let Ok(content) = std::fs::read_to_string(&path) {
671                if let Ok(val) = serde_json::from_str::<serde_json::Value>(&content) {
672                    if val.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
673                        meta_matches += 1;
674                    }
675                }
676            }
677            continue;
678        }
679
680        // Skip non-json or comparison files
681        if !name.ends_with(".json") || name.starts_with("compare_") {
682            continue;
683        }
684
685        let stem = path
686            .file_stem()
687            .and_then(|s| s.to_str())
688            .unwrap_or("")
689            .to_string();
690        non_meta_paths.push((path, stem));
691    }
692
693    // Only read full eval JSON for entries without a .meta.json
694    let fallback_matches = non_meta_paths
695        .iter()
696        .filter(|(_, stem)| !meta_stems.contains(stem))
697        .filter(|(path, _)| {
698            std::fs::read_to_string(path)
699                .ok()
700                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
701                .and_then(|v| v.get("strategy")?.as_str().map(|s| s == pkg))
702                .unwrap_or(false)
703        })
704        .count();
705
706    meta_matches + fallback_matches
707}
708
709// ─── Merge ─────────────────────────────────────────────────────
710
711/// Merge remote index with local install state.
712///
713/// When a package is installed locally and the remote index lacks a
714/// docstring (pre-v0.21 indices), the docstring is extracted from the
715/// local `init.lua` so that full-text search works immediately.
716fn merge(app_dir: &AppDir, remote: &HubIndex) -> Result<Vec<SearchResult>, String> {
717    let installed = installed_packages(app_dir)?;
718    let card_counts = local_card_counts(app_dir);
719    let pkg_dir: Option<PathBuf> = Some(app_dir.packages_dir());
720
721    let mut seen: HashSet<String> = HashSet::new();
722    let mut results: Vec<SearchResult> = Vec::new();
723
724    for entry in &remote.packages {
725        let pkg_name = &entry.entity.name;
726        let is_installed = installed.contains_key(pkg_name);
727        let local_cards = card_counts.get(pkg_name).copied().unwrap_or(0);
728
729        // Supplement empty docstring from local init.lua when installed.
730        // Re-parse via `PkgEntity` so the supplementation path stays
731        // consistent with `build_index`.
732        let docstring = if entry.entity.docstring.as_deref().unwrap_or("").is_empty()
733            && is_installed
734        {
735            pkg_dir
736                .as_ref()
737                .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(pkg_name).join("init.lua")))
738                .and_then(|e| e.docstring)
739        } else {
740            entry.entity.docstring.clone()
741        };
742
743        seen.insert(pkg_name.clone());
744        let mut merged_entity = entry.entity.clone();
745        merged_entity.docstring = docstring;
746        results.push(SearchResult {
747            entity: merged_entity,
748            source: entry.source.clone(),
749            installed: is_installed,
750            card_count: if is_installed && local_cards > entry.card_count {
751                local_cards
752            } else {
753                entry.card_count
754            },
755            best_card: entry.best_card.clone(),
756            docstring_matched: None,
757        });
758    }
759
760    // Add local-only packages (not in remote index).
761    for (name, version) in &installed {
762        if seen.contains(name) {
763            continue;
764        }
765        // Pull full `PkgEntity` from local init.lua when available (keeps the
766        // wire shape consistent with remote entries). When the package does
767        // not parse as a `PkgEntity` (missing `M.meta.name`), fall back to
768        // a minimal entity with just the directory name and the manifest
769        // version — the entry still appears in local-only listings, but the
770        // richer projection fields are simply absent.
771        let parsed_entity = pkg_dir
772            .as_ref()
773            .and_then(|d| PkgEntity::parse_from_init_lua(&d.join(name).join("init.lua")));
774        let entity = parsed_entity.unwrap_or(PkgEntity {
775            name: name.clone(),
776            version: version.clone(),
777            description: None,
778            category: None,
779            docstring: None,
780        });
781        results.push(SearchResult {
782            entity,
783            source: PackageSource::Unknown,
784            installed: true,
785            card_count: card_counts.get(name).copied().unwrap_or(0),
786            best_card: None,
787            docstring_matched: None,
788        });
789    }
790
791    Ok(results)
792}
793
794// ─── Search (filtering) ───────────────────────────────────────
795
796fn matches_query(result: &SearchResult, query: &str) -> bool {
797    let q = query.to_lowercase();
798    let pkg = &result.entity;
799    let empty = String::new();
800    pkg.name.to_lowercase().contains(&q)
801        || pkg
802            .description
803            .as_ref()
804            .unwrap_or(&empty)
805            .to_lowercase()
806            .contains(&q)
807        || pkg
808            .category
809            .as_ref()
810            .unwrap_or(&empty)
811            .to_lowercase()
812            .contains(&q)
813        || pkg
814            .docstring
815            .as_ref()
816            .unwrap_or(&empty)
817            .to_lowercase()
818            .contains(&q)
819}
820
821// ─── Index generation (reindex) ───────────────────────────────
822//
823// The non-Lua-VM parser that used to live here
824// (`parse_meta_from_init_lua` / `extract_docstring`) has moved into
825// `algocline_core::PkgEntity::parse_from_init_lua`, where it is shared
826// with the manifest / lockfile wire format. The parsing tests migrated
827// with it; `hub.rs` now just consumes the typed `PkgEntity` projection.
828
829/// Build a hub index by scanning a packages directory.
830///
831/// When `source_dir` is provided, scans that directory directly
832/// (for generating an index from a repo checkout).  Metadata comes
833/// only from `init.lua` — no manifest lookup, no card counts.
834///
835/// When `source_dir` is `None`, scans `~/.algocline/packages/` and
836/// enriches entries with manifest source and local card counts.
837fn build_index(app_dir: &AppDir, source_dir: Option<&std::path::Path>) -> Result<HubIndex, String> {
838    let empty = || HubIndex {
839        schema_version: "hub_index/v0".into(),
840        updated_at: super::manifest::now_iso8601(),
841        packages: Vec::new(),
842    };
843
844    let pkg_dir = match source_dir {
845        Some(d) => d.to_path_buf(),
846        None => app_dir.packages_dir(),
847    };
848
849    let use_local_state = source_dir.is_none();
850    let card_counts = if use_local_state {
851        local_card_counts(app_dir)
852    } else {
853        HashMap::new()
854    };
855    // Manifest read errors surface as `Err` rather than degrading to an
856    // empty manifest — when building the local hub index, a corrupt
857    // `installed.json` silently turning all package sources into
858    // `PackageSource::Unknown` would be indistinguishable from the
859    // legitimate "no source recorded" state, and would ship into
860    // generated `hub_index.json` files verbatim.
861    let manifest = if use_local_state {
862        manifest::load_manifest(app_dir)?
863    } else {
864        manifest::Manifest::default()
865    };
866
867    let mut entries = Vec::new();
868
869    // Missing / unreadable `pkg_dir` is a legitimate "no packages yet"
870    // state on a fresh install — distinct from manifest corruption
871    // above, and safe to surface as an empty index.
872    let dir_entries = match std::fs::read_dir(&pkg_dir) {
873        Ok(e) => e,
874        Err(_) => return Ok(empty()),
875    };
876
877    for entry in dir_entries.flatten() {
878        if !entry.path().is_dir() {
879            continue;
880        }
881        let dir_name = match entry.file_name().to_str() {
882            Some(n) if !n.starts_with('.') && !n.starts_with('_') => n.to_string(),
883            _ => continue,
884        };
885
886        let init_lua = entry.path().join("init.lua");
887        if !init_lua.exists() {
888            continue;
889        }
890
891        // Silent-exclude gate: `PkgEntity::parse_from_init_lua` returns `None`
892        // when `M.meta` is absent or `M.meta.name` is empty. Directories that
893        // happen to contain an `init.lua` but aren't algocline packages
894        // (e.g. `alc_shapes/`, a type DSL library) are dropped from the index
895        // rather than falling through with a placeholder name — that would
896        // pollute hub_search.
897        let Some(entity) = PkgEntity::parse_from_init_lua(&init_lua) else {
898            continue;
899        };
900
901        // Use manifest source only for local-state mode. When the manifest
902        // has no record for this directory, default to `PackageSource::Unknown`
903        // (via `Default`) — hub consumers see it as "source not recorded".
904        let source = manifest
905            .packages
906            .get(&dir_name)
907            .map(|e| e.source.clone())
908            .unwrap_or_default();
909
910        entries.push(IndexEntry {
911            entity,
912            source,
913            card_count: card_counts.get(&dir_name).copied().unwrap_or(0),
914            best_card: None,
915        });
916    }
917
918    entries.sort_by(|a, b| a.entity.name.cmp(&b.entity.name));
919
920    Ok(HubIndex {
921        schema_version: "hub_index/v0".into(),
922        updated_at: super::manifest::now_iso8601(),
923        packages: entries,
924    })
925}
926
927// ─── Public API ────────────────────────────────────────────────
928
929impl AppService {
930    /// Generate a hub index from a packages directory.
931    ///
932    /// When `source_dir` is provided, scans that directory (e.g. a
933    /// repo checkout) — pure metadata extraction, no manifest or card
934    /// data mixed in.  When omitted, scans `~/.algocline/packages/`.
935    ///
936    /// Writes the index to `output_path` (for CI / publishing).
937    /// Does NOT touch the remote search cache.
938    pub fn hub_reindex(
939        &self,
940        output_path: Option<&str>,
941        source_dir: Option<&str>,
942    ) -> Result<String, String> {
943        let src = source_dir.map(std::path::Path::new);
944        if let Some(d) = src {
945            if !d.is_dir() {
946                return Err(format!("source_dir '{}' is not a directory", d.display()));
947            }
948        }
949        let app_dir = self.log_config.app_dir();
950        let index = build_index(&app_dir, src)?;
951
952        let written_path = if let Some(path) = output_path {
953            let json = serde_json::to_string_pretty(&index)
954                .map_err(|e| format!("Failed to serialize index: {e}"))?;
955            std::fs::write(path, &json)
956                .map_err(|e| format!("Failed to write index to {path}: {e}"))?;
957            Some(path.to_string())
958        } else {
959            None
960        };
961
962        let response = serde_json::json!({
963            "package_count": index.packages.len(),
964            "updated_at": index.updated_at,
965            "output_path": written_path,
966            "source_dir": source_dir,
967        });
968        Ok(response.to_string())
969    }
970
971    /// Show detailed information for a single package.
972    ///
973    /// Aggregates package metadata (from index or local `init.lua`),
974    /// all Cards, aliases, and eval stats into one response.
975    pub fn hub_info(&self, pkg: &str) -> Result<String, String> {
976        use algocline_engine::card;
977
978        // Guard against path traversal
979        if pkg.contains("..") || pkg.contains('/') || pkg.contains('\\') {
980            return Err(format!("Invalid package name: '{pkg}'"));
981        }
982
983        // Package metadata: try remote index first, fall back to local
984        let app_dir = self.log_config.app_dir();
985        let installed = installed_packages(&app_dir)?;
986        let is_installed = installed.contains_key(pkg);
987
988        // Resolve package metadata: try remote index first, fall back to
989        // local init.lua. `version` / `description` / `category` are modelled
990        // as `Option<String>` at the `PkgEntity` layer; at this API surface
991        // we flatten `None` to empty string so the wire shape (non-null
992        // JSON string fields) stays unchanged for existing consumers.
993        let (version, description, category, source) = {
994            let (remote, _) = fetch_remote_indices(&app_dir)?;
995            if let Some(entry) = remote.packages.iter().find(|e| e.entity.name == pkg) {
996                (
997                    entry.entity.version.clone().unwrap_or_default(),
998                    entry.entity.description.clone().unwrap_or_default(),
999                    entry.entity.category.clone().unwrap_or_default(),
1000                    entry.source.clone(),
1001                )
1002            } else if is_installed {
1003                // Fall back to local init.lua parse via `PkgEntity`. When
1004                // the file is not a valid package (no `M.meta.name`), we
1005                // degrade gracefully by returning the manifest-recorded
1006                // version and empty string fields — mirroring the pre-typed
1007                // behaviour.
1008                let init_lua = app_dir.packages_dir().join(pkg).join("init.lua");
1009                let entity = PkgEntity::parse_from_init_lua(&init_lua);
1010                let manifest_source = manifest::load_manifest(&app_dir)?
1011                    .packages
1012                    .get(pkg)
1013                    .map(|e| e.source.clone())
1014                    .unwrap_or_default();
1015                match entity {
1016                    Some(e) => (
1017                        e.version.unwrap_or_default(),
1018                        e.description.unwrap_or_default(),
1019                        e.category.unwrap_or_default(),
1020                        manifest_source,
1021                    ),
1022                    None => (
1023                        installed.get(pkg).cloned().flatten().unwrap_or_default(),
1024                        String::new(),
1025                        String::new(),
1026                        manifest_source,
1027                    ),
1028                }
1029            } else {
1030                return Err(format!(
1031                    "Package '{pkg}' not found in remote indices or locally installed packages"
1032                ));
1033            }
1034        };
1035
1036        // Cards for this package (single call, reused for stats)
1037        let card_rows = self.card_store.list(Some(pkg)).unwrap_or_default();
1038        let cards_json = card::summaries_to_json(&card_rows);
1039
1040        // Aliases for this package
1041        let aliases_json = match self.card_store.alias_list(Some(pkg)) {
1042            Ok(rows) => card::aliases_to_json(&rows),
1043            Err(_) => serde_json::json!([]),
1044        };
1045
1046        // Stats: card count, best pass_rate, eval count
1047        let card_count = card_rows.len();
1048        let best_pass_rate = card_rows
1049            .iter()
1050            .filter_map(|c| c.pass_rate)
1051            .fold(f64::NEG_INFINITY, f64::max);
1052        let best_pass_rate = if best_pass_rate.is_finite() {
1053            Some(best_pass_rate)
1054        } else {
1055            None
1056        };
1057
1058        // Eval count from evals directory
1059        let eval_count = count_evals_for_pkg(&app_dir, pkg);
1060
1061        let response = serde_json::json!({
1062            "pkg": {
1063                "name": pkg,
1064                "version": version,
1065                "description": description,
1066                "category": category,
1067                "source": source,
1068                "installed": is_installed,
1069            },
1070            "cards": cards_json,
1071            "aliases": aliases_json,
1072            "stats": {
1073                "card_count": card_count,
1074                "eval_count": eval_count,
1075                "best_pass_rate": best_pass_rate,
1076            },
1077        });
1078        Ok(response.to_string())
1079    }
1080
1081    /// Search packages across remote indices + local state.
1082    ///
1083    /// Index URLs are discovered from hub registries, manifest sources,
1084    /// and `AUTO_INSTALL_SOURCES`. Each source is cached independently.
1085    ///
1086    /// ## List-tool options (`opts`)
1087    ///
1088    /// The `opts` parameter carries the list-tool primitives
1089    /// (`limit / sort / filter / fields / verbose`) shared with other
1090    /// list-style MCP tools. Defaults:
1091    ///
1092    /// - `limit` — 50 when `None`. `Some(0)` means **no limit** (return
1093    ///   all matching entries — empty-means-all idiom).
1094    /// - `sort` — `"-installed,name"` when `None` (installed first, then
1095    ///   ascending by name).
1096    /// - `filter` — no additional filter. Legacy `category` /
1097    ///   `installed_only` parameters are merged into the filter map when
1098    ///   `filter` does not already contain those keys (explicit
1099    ///   `filter` wins on conflict).
1100    /// - `fields` / `verbose` — projection is applied to every entry in
1101    ///   the `results` array (see
1102    ///   [`super::list_opts::resolve_fields`]). Top-level keys
1103    ///   (`total`, `sources`, `warnings`) are never projected away.
1104    ///
1105    /// ## docstring handling
1106    ///
1107    /// [`SearchResult::docstring`] is `skip_serializing`, so it is
1108    /// absent from the default serialized view. When the resolved
1109    /// projection contains `"docstring"`, it is re-injected into the
1110    /// per-entry JSON via
1111    /// [`SearchResult::to_value_with_optional_docstring`].
1112    pub(crate) fn hub_search(
1113        &self,
1114        query: Option<&str>,
1115        category: Option<&str>,
1116        installed_only: Option<bool>,
1117        opts: ListOpts,
1118    ) -> Result<String, String> {
1119        let app_dir = self.log_config.app_dir();
1120        let (remote, warnings) = fetch_remote_indices(&app_dir)?;
1121        let mut results = merge(&app_dir, &remote)?;
1122
1123        // Filter by query (internal signal covers name/description/
1124        // category/docstring — `matches_query` unchanged).
1125        let query_lower = query.filter(|q| !q.is_empty()).map(|q| q.to_lowercase());
1126        if let Some(ref ql) = query_lower {
1127            results.retain(|r| matches_query(r, ql));
1128        }
1129
1130        // Compute docstring_matched per remaining hit: Some(true) only
1131        // when the query matched docstring and none of {name,
1132        // description, category}; otherwise None.
1133        if let Some(ref ql) = query_lower {
1134            for r in &mut results {
1135                let empty = String::new();
1136                let pkg = &r.entity;
1137                let other_hit = pkg.name.to_lowercase().contains(ql)
1138                    || pkg
1139                        .description
1140                        .as_ref()
1141                        .unwrap_or(&empty)
1142                        .to_lowercase()
1143                        .contains(ql)
1144                    || pkg
1145                        .category
1146                        .as_ref()
1147                        .unwrap_or(&empty)
1148                        .to_lowercase()
1149                        .contains(ql);
1150                let doc_hit = pkg
1151                    .docstring
1152                    .as_ref()
1153                    .unwrap_or(&empty)
1154                    .to_lowercase()
1155                    .contains(ql);
1156                r.docstring_matched = if !other_hit && doc_hit {
1157                    Some(true)
1158                } else {
1159                    None
1160                };
1161            }
1162        }
1163
1164        // Build the effective filter map: start from explicit `opts.filter`,
1165        // then fold legacy `category` / `installed_only` in only if the
1166        // corresponding key is not already set (explicit filter wins).
1167        let mut filter_map: std::collections::HashMap<String, serde_json::Value> =
1168            opts.filter.unwrap_or_default();
1169        if let Some(cat) = category {
1170            filter_map
1171                .entry("category".to_string())
1172                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1173        }
1174        if let Some(only) = installed_only {
1175            // Preserve prior semantic: `installed_only=Some(false)` was a
1176            // no-op (it did not force `installed=false`). Only fold when
1177            // explicitly true.
1178            if only {
1179                filter_map
1180                    .entry("installed".to_string())
1181                    .or_insert(serde_json::Value::Bool(true));
1182            }
1183        }
1184
1185        // Resolve sort keys up-front so an invalid sort string errors out
1186        // before we touch results.
1187        let sort_str = opts.sort.as_deref().unwrap_or("-installed,name");
1188        let sort_keys = parse_sort(sort_str)?;
1189
1190        // Resolve projection fields; this also rejects unknown `verbose`
1191        // values before any heavy work.
1192        let fields = resolve_fields(
1193            opts.verbose.as_deref(),
1194            opts.fields.as_deref(),
1195            HUB_SEARCH_SUMMARY,
1196            HUB_SEARCH_FULL,
1197        )?;
1198        let include_docstring = fields.iter().any(|f| f == "docstring");
1199
1200        // Serialize each result to a Value (docstring optionally attached)
1201        // so filter/sort/projection work uniformly on JSON values.
1202        let mut items: Vec<serde_json::Value> = results
1203            .iter()
1204            .map(|r| r.to_value_with_optional_docstring(include_docstring))
1205            .collect();
1206
1207        // Filter AFTER serialization so filter keys can reference
1208        // projection-level shape (e.g. `category`, `installed`).
1209        if !filter_map.is_empty() {
1210            items.retain(|v| matches_filter(v, &filter_map));
1211        }
1212
1213        // Sort.
1214        apply_sort_by_value(&mut items, &sort_keys);
1215
1216        // Limit. `limit = Some(0)` means "no limit" (return all results)
1217        // — mirrors the `empty=all & some=filter` idiom used across the
1218        // list-tool contract. `None` falls back to the default cap (50).
1219        let total = items.len();
1220        let limit = opts.limit.unwrap_or(50);
1221        if limit > 0 {
1222            items.truncate(limit);
1223        }
1224
1225        // Projection (after truncation — unselected fields are stripped
1226        // from the kept entries only).
1227        let projected: Vec<serde_json::Value> = items
1228            .into_iter()
1229            .map(|v| project_fields(v, &fields))
1230            .collect();
1231
1232        // Collect discovered sources for transparency.
1233        let sources = discover_index_urls(&app_dir)?;
1234
1235        let mut json = serde_json::json!({
1236            "results": projected,
1237            "total": total,
1238            "sources": sources,
1239        });
1240        if !warnings.is_empty() {
1241            json["warnings"] = serde_json::json!(warnings);
1242        }
1243        Ok(json.to_string())
1244    }
1245}
1246
1247#[cfg(test)]
1248mod tests {
1249    use super::*;
1250
1251    #[test]
1252    fn repo_to_index_url_github() {
1253        assert_eq!(
1254            repo_to_index_url("https://github.com/ynishi/algocline-bundled-packages"),
1255            Some(
1256                "https://raw.githubusercontent.com/ynishi/algocline-bundled-packages/main/hub_index.json"
1257                    .to_string()
1258            )
1259        );
1260    }
1261
1262    #[test]
1263    fn repo_to_index_url_github_trailing_slash() {
1264        assert_eq!(
1265            repo_to_index_url("https://github.com/user/repo/"),
1266            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1267        );
1268    }
1269
1270    #[test]
1271    fn repo_to_index_url_github_dot_git() {
1272        assert_eq!(
1273            repo_to_index_url("https://github.com/user/repo.git"),
1274            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1275        );
1276    }
1277
1278    #[test]
1279    fn repo_to_index_url_direct_json() {
1280        assert_eq!(
1281            repo_to_index_url("https://example.com/my_index.json"),
1282            Some("https://example.com/my_index.json".to_string())
1283        );
1284    }
1285
1286    #[test]
1287    fn repo_to_index_url_unknown_host_no_json() {
1288        assert_eq!(repo_to_index_url("https://example.com/some-repo"), None);
1289    }
1290
1291    #[test]
1292    fn repo_to_index_url_local_path() {
1293        assert_eq!(repo_to_index_url("/home/user/my-pkg"), None);
1294    }
1295
1296    #[test]
1297    fn cache_key_stable() {
1298        let k1 = cache_key("https://example.com/index.json");
1299        let k2 = cache_key("https://example.com/index.json");
1300        assert_eq!(k1, k2);
1301        assert_eq!(k1.len(), 16); // 16 hex chars
1302    }
1303
1304    #[test]
1305    fn cache_key_different_urls() {
1306        let k1 = cache_key("https://a.com/index.json");
1307        let k2 = cache_key("https://b.com/index.json");
1308        assert_ne!(k1, k2);
1309    }
1310
1311    // NOTE: The init.lua meta / docstring parsing tests have moved to
1312    // `algocline_core::pkg::tests` along with the parser itself. The
1313    // `hub.rs` call-path tests now exercise the typed `PkgEntity` via
1314    // `build_index` / `merge` only.
1315
1316    #[test]
1317    fn merge_dedup_uses_hashset() {
1318        // Verify that merge correctly handles local-only packages
1319        // without O(n*m) behavior (structural test).
1320        let tmp = tempfile::tempdir().unwrap();
1321        let app_dir = AppDir::new(tmp.path().to_path_buf());
1322        let remote = HubIndex {
1323            schema_version: "hub_index/v0".into(),
1324            updated_at: String::new(),
1325            packages: vec![IndexEntry {
1326                entity: PkgEntity {
1327                    name: "remote_only".into(),
1328                    version: Some("1.0".into()),
1329                    description: Some("from remote".into()),
1330                    category: Some("test".into()),
1331                    docstring: None,
1332                },
1333                source: PackageSource::Unknown,
1334                card_count: 0,
1335                best_card: None,
1336            }],
1337        };
1338
1339        let results = merge(&app_dir, &remote).expect("merge over empty app_dir should succeed");
1340        // Should include remote_only + any locally installed packages
1341        assert!(results.iter().any(|r| r.entity.name == "remote_only"));
1342    }
1343
1344    #[test]
1345    fn matches_query_searches_docstring() {
1346        let result = SearchResult {
1347            entity: PkgEntity {
1348                name: "cascade".into(),
1349                version: Some("0.1.0".into()),
1350                description: Some("Multi-level routing".into()),
1351                category: Some("meta".into()),
1352                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1353            },
1354            source: PackageSource::Unknown,
1355            installed: true,
1356            card_count: 0,
1357            best_card: None,
1358            docstring_matched: None,
1359        };
1360
1361        assert!(matches_query(&result, "thompson"), "docstring match");
1362        assert!(matches_query(&result, "FrugalGPT"), "docstring match case");
1363        assert!(matches_query(&result, "routing"), "description match");
1364        assert!(!matches_query(&result, "bayesian"), "no match");
1365    }
1366
1367    // ─── SearchResult::to_value_with_optional_docstring ────────────
1368    //
1369    // `docstring` is not emitted by the default serde path (via the
1370    // `serialize_entity_without_docstring` custom serializer) and is
1371    // re-attached only when the projection path says so. These tests
1372    // pin the two branches of that helper — they are the hinge that
1373    // `verbose="full"` / `fields=["docstring"]` rely on.
1374
1375    fn sample_search_result() -> SearchResult {
1376        SearchResult {
1377            entity: PkgEntity {
1378                name: "cascade".into(),
1379                version: Some("0.1.0".into()),
1380                description: Some("Multi-level routing".into()),
1381                category: Some("reasoning".into()),
1382                docstring: Some("Based on FrugalGPT. Uses Thompson Sampling.".into()),
1383            },
1384            source: PackageSource::Git {
1385                url: "https://example.com/cascade".into(),
1386                rev: None,
1387            },
1388            installed: true,
1389            card_count: 3,
1390            best_card: None,
1391            docstring_matched: None,
1392        }
1393    }
1394
1395    #[test]
1396    fn to_value_default_omits_docstring() {
1397        let r = sample_search_result();
1398        let v = r.to_value_with_optional_docstring(false);
1399        let obj = v.as_object().expect("object");
1400        assert!(
1401            !obj.contains_key("docstring"),
1402            "default summary must not leak docstring"
1403        );
1404        assert_eq!(obj.get("name").and_then(|x| x.as_str()), Some("cascade"));
1405        // `docstring_matched` is Option<None> → `skip_serializing_if`
1406        // must omit it when the query did not mark a docstring-only hit.
1407        assert!(
1408            !obj.contains_key("docstring_matched"),
1409            "docstring_matched=None must be omitted"
1410        );
1411    }
1412
1413    #[test]
1414    fn to_value_include_reattaches_docstring() {
1415        let r = sample_search_result();
1416        let v = r.to_value_with_optional_docstring(true);
1417        let obj = v.as_object().expect("object");
1418        assert_eq!(
1419            obj.get("docstring").and_then(|x| x.as_str()),
1420            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1421        );
1422    }
1423
1424    #[test]
1425    fn to_value_serializes_docstring_matched_when_set() {
1426        let mut r = sample_search_result();
1427        r.docstring_matched = Some(true);
1428        let v = r.to_value_with_optional_docstring(false);
1429        let obj = v.as_object().expect("object");
1430        assert_eq!(
1431            obj.get("docstring_matched").and_then(|x| x.as_bool()),
1432            Some(true)
1433        );
1434    }
1435
1436    // ─── projection glue ──────────────────────────────────────────
1437    //
1438    // These tests exercise the projection path that `hub_search` uses to
1439    // shape output: `resolve_fields` + `project_fields` applied to a
1440    // `to_value_with_optional_docstring`-serialized entry. They pin the
1441    // wf-sim-verbose contract: `fields` wins over `verbose`, default
1442    // summary preset excludes docstring, `full` preset includes
1443    // docstring, unknown keys silently skipped.
1444
1445    #[test]
1446    fn hub_search_default_summary_excludes_docstring() {
1447        let r = sample_search_result();
1448        let fields = resolve_fields(None, None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1449        let include_docstring = fields.iter().any(|f| f == "docstring");
1450        let v = project_fields(
1451            r.to_value_with_optional_docstring(include_docstring),
1452            &fields,
1453        );
1454        let obj = v.as_object().expect("object");
1455        assert!(
1456            !obj.contains_key("docstring"),
1457            "summary preset must omit docstring"
1458        );
1459        // summary preset fields that are present on the sample entry
1460        for key in ["name", "version", "description", "category", "installed"] {
1461            assert!(obj.contains_key(key), "summary preset key {key} missing");
1462        }
1463    }
1464
1465    #[test]
1466    fn hub_search_verbose_full_includes_docstring() {
1467        let r = sample_search_result();
1468        let fields =
1469            resolve_fields(Some("full"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1470        let include_docstring = fields.iter().any(|f| f == "docstring");
1471        let v = project_fields(
1472            r.to_value_with_optional_docstring(include_docstring),
1473            &fields,
1474        );
1475        let obj = v.as_object().expect("object");
1476        assert_eq!(
1477            obj.get("docstring").and_then(|x| x.as_str()),
1478            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1479        );
1480        // full preset superset keys
1481        for key in ["source", "card_count"] {
1482            assert!(obj.contains_key(key), "full preset key {key} missing");
1483        }
1484    }
1485
1486    #[test]
1487    fn hub_search_fields_beats_verbose() {
1488        let r = sample_search_result();
1489        let explicit = vec!["name".to_string(), "docstring".to_string()];
1490        // verbose=summary normally excludes docstring, but explicit
1491        // fields must win.
1492        let fields = resolve_fields(
1493            Some("summary"),
1494            Some(&explicit),
1495            HUB_SEARCH_SUMMARY,
1496            HUB_SEARCH_FULL,
1497        )
1498        .unwrap();
1499        let include_docstring = fields.iter().any(|f| f == "docstring");
1500        let v = project_fields(
1501            r.to_value_with_optional_docstring(include_docstring),
1502            &fields,
1503        );
1504        let obj = v.as_object().expect("object");
1505        assert_eq!(obj.len(), 2, "only the two requested fields");
1506        assert!(obj.contains_key("name"));
1507        assert!(obj.contains_key("docstring"));
1508    }
1509
1510    #[test]
1511    fn hub_search_fields_unknown_key_silently_skipped() {
1512        let r = sample_search_result();
1513        let explicit = vec!["name".to_string(), "bogus".to_string()];
1514        let fields =
1515            resolve_fields(None, Some(&explicit), HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1516        let v = project_fields(r.to_value_with_optional_docstring(false), &fields);
1517        let obj = v.as_object().expect("object");
1518        assert_eq!(obj.len(), 1, "bogus must not appear");
1519        assert!(obj.contains_key("name"));
1520    }
1521
1522    #[test]
1523    fn hub_search_invalid_verbose_errors() {
1524        let err =
1525            resolve_fields(Some("fat"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap_err();
1526        assert!(
1527            err.contains("fat"),
1528            "error must mention the offending value"
1529        );
1530    }
1531
1532    // ─── docstring_matched classification ─────────────────────────
1533    //
1534    // The query-time classification rule: `docstring_matched = Some(true)`
1535    // only when the query hit docstring AND missed name/description/
1536    // category; otherwise `None` (and therefore omitted from output).
1537    // The logic lives inline in `hub_search`; we re-create it here over a
1538    // tiny local helper so the three cases stay pinned as a contract.
1539
1540    fn classify(r: &SearchResult, query: &str) -> Option<bool> {
1541        let ql = query.to_lowercase();
1542        if query.is_empty() {
1543            return None;
1544        }
1545        let empty = String::new();
1546        let pkg = &r.entity;
1547        let other_hit = pkg.name.to_lowercase().contains(&ql)
1548            || pkg
1549                .description
1550                .as_ref()
1551                .unwrap_or(&empty)
1552                .to_lowercase()
1553                .contains(&ql)
1554            || pkg
1555                .category
1556                .as_ref()
1557                .unwrap_or(&empty)
1558                .to_lowercase()
1559                .contains(&ql);
1560        let doc_hit = pkg
1561            .docstring
1562            .as_ref()
1563            .unwrap_or(&empty)
1564            .to_lowercase()
1565            .contains(&ql);
1566        if !other_hit && doc_hit {
1567            Some(true)
1568        } else {
1569            None
1570        }
1571    }
1572
1573    #[test]
1574    fn docstring_matched_true_when_only_docstring_hits() {
1575        let r = sample_search_result();
1576        // "Thompson" appears only in docstring of the sample entry.
1577        assert_eq!(classify(&r, "thompson"), Some(true));
1578    }
1579
1580    #[test]
1581    fn docstring_matched_none_when_name_also_hits() {
1582        let r = sample_search_result();
1583        // "cascade" hits the name; docstring match is irrelevant now.
1584        assert_eq!(classify(&r, "cascade"), None);
1585    }
1586
1587    #[test]
1588    fn docstring_matched_none_when_description_hits() {
1589        let r = sample_search_result();
1590        // "routing" hits description; should be None.
1591        assert_eq!(classify(&r, "routing"), None);
1592    }
1593
1594    #[test]
1595    fn docstring_matched_none_when_query_empty() {
1596        let r = sample_search_result();
1597        assert_eq!(classify(&r, ""), None);
1598    }
1599
1600    // ─── filter fold (legacy params → filter map) ─────────────────
1601    //
1602    // Behavioural rule: legacy `category` / `installed_only=true` fold
1603    // into the filter map only when the corresponding key is not
1604    // already set (explicit `filter` wins). `installed_only=false` is a
1605    // no-op (preserves prior semantics).
1606
1607    fn build_filter_map(
1608        category: Option<&str>,
1609        installed_only: Option<bool>,
1610        explicit: Option<HashMap<String, serde_json::Value>>,
1611    ) -> HashMap<String, serde_json::Value> {
1612        let mut filter_map = explicit.unwrap_or_default();
1613        if let Some(cat) = category {
1614            filter_map
1615                .entry("category".to_string())
1616                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1617        }
1618        if let Some(only) = installed_only {
1619            if only {
1620                filter_map
1621                    .entry("installed".to_string())
1622                    .or_insert(serde_json::Value::Bool(true));
1623            }
1624        }
1625        filter_map
1626    }
1627
1628    #[test]
1629    fn filter_by_category_via_legacy_param() {
1630        let m = build_filter_map(Some("reasoning"), None, None);
1631        assert_eq!(
1632            m.get("category"),
1633            Some(&serde_json::Value::String("reasoning".to_string()))
1634        );
1635    }
1636
1637    #[test]
1638    fn filter_by_installed_only_via_legacy_param() {
1639        let m = build_filter_map(None, Some(true), None);
1640        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1641    }
1642
1643    #[test]
1644    fn filter_installed_only_false_is_noop() {
1645        let m = build_filter_map(None, Some(false), None);
1646        assert!(
1647            !m.contains_key("installed"),
1648            "installed_only=false should not fold in"
1649        );
1650    }
1651
1652    #[test]
1653    fn filter_beats_legacy_param_on_conflict() {
1654        // Explicit filter says category=meta; legacy says reasoning.
1655        // Explicit must win.
1656        let mut explicit = HashMap::new();
1657        explicit.insert(
1658            "category".to_string(),
1659            serde_json::Value::String("meta".to_string()),
1660        );
1661        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1662        assert_eq!(
1663            m.get("category"),
1664            Some(&serde_json::Value::String("meta".to_string()))
1665        );
1666    }
1667
1668    #[test]
1669    fn filter_merges_legacy_when_no_conflict() {
1670        // Explicit sets a different key; legacy category should still
1671        // be folded in.
1672        let mut explicit = HashMap::new();
1673        explicit.insert("installed".to_string(), serde_json::Value::Bool(true));
1674        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1675        assert_eq!(
1676            m.get("category"),
1677            Some(&serde_json::Value::String("reasoning".to_string()))
1678        );
1679        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1680    }
1681
1682    // ─── default sort verification ────────────────────────────────
1683
1684    #[test]
1685    fn default_sort_is_minus_installed_name() {
1686        let keys = parse_sort("-installed,name").unwrap();
1687        assert_eq!(keys.len(), 2);
1688        assert_eq!(keys[0].key, "installed");
1689        assert!(keys[0].desc, "installed must sort desc (true first)");
1690        assert_eq!(keys[1].key, "name");
1691        assert!(!keys[1].desc);
1692
1693        // Apply it against a small vec and confirm the expected order.
1694        let mut items = vec![
1695            serde_json::json!({"installed": false, "name": "zeta"}),
1696            serde_json::json!({"installed": true, "name": "mu"}),
1697            serde_json::json!({"installed": false, "name": "alpha"}),
1698            serde_json::json!({"installed": true, "name": "beta"}),
1699        ];
1700        apply_sort_by_value(&mut items, &keys);
1701        let names: Vec<&str> = items
1702            .iter()
1703            .map(|v| v.get("name").and_then(|x| x.as_str()).unwrap_or(""))
1704            .collect();
1705        assert_eq!(names, vec!["beta", "mu", "alpha", "zeta"]);
1706    }
1707}