Skip to main content

algocline_app/service/
hub.rs

1//! Hub — package discovery, search, and index management.
2//!
3//! The Hub is algocline's package registry layer.  It aggregates remote
4//! index data with local install state so that users (via AI) can
5//! **discover** packages they haven't installed yet, and **inspect**
6//! installed packages with full Card and eval statistics.
7//!
8//! ## Staged design
9//!
10//! | Stage | Scope | Status |
11//! |-------|-------|--------|
12//! | **1** | Card Collection install, Pkg-bundled cards | Done |
13//! | **2** | Hub MCP tools (`hub_search`, `hub_info`, `hub_reindex`), local index | Done |
14//! | **3** | Aggregated remote collection index, `hub_publish`, LP | Planned |
15//!
16//! ## MCP tools
17//!
18//! | Tool | Description |
19//! |------|-------------|
20//! | `alc_hub_search` | Discover packages across remote + local indices |
21//! | `alc_hub_info` | Detailed single-package view (meta + cards + aliases + stats) |
22//! | `alc_hub_reindex` | Rebuild index from local packages or a repo checkout |
23//!
24//! ## Index schema (`hub_index/v0`)
25//!
26//! ```json
27//! {
28//!   "schema_version": "hub_index/v0",
29//!   "updated_at": "2026-04-12T10:00:00Z",
30//!   "packages": [{
31//!     "name": "cot",
32//!     "version": "0.1.0",
33//!     "description": "Chain-of-Thought prompting",
34//!     "category": "reasoning",
35//!     "source": "https://github.com/...",
36//!     "card_count": 3,
37//!     "best_card": { "card_id": "...", "model": "...", "pass_rate": 0.82, "scenario": "..." }
38//!   }]
39//! }
40//! ```
41//!
42//! Index generation uses `init.lua` M.meta parsing only — no Lua VM
43//! required.  This keeps the index buildable in CI environments.
44//!
45//! ## Index URL discovery (4-tier)
46//!
47//! Sources are checked in priority order; URLs are deduplicated:
48//!
49//!   0. **Collection URL** — `[hub].collection_url` in `~/.algocline/config.toml`.
50//!      Aggregated index containing all known packages (Stage 3).
51//!   1. **Hub registries** — `~/.algocline/hub_registries.json`, auto-populated
52//!      by `pkg_install` and `card_install`.
53//!   2. **Installed manifest** — `~/.algocline/installed.json`, fallback for
54//!      sources registered before registries existed.
55//!   3. **Compiled-in seeds** — `AUTO_INSTALL_SOURCES` for first-run bootstrap.
56//!
57//! GitHub repo URLs are transformed to raw index URLs:
58//!
59//! ```text
60//! https://github.com/{owner}/{repo}
61//!   → https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
62//! ```
63//!
64//! ## Caching
65//!
66//! Remote indices are cached per-source at
67//! `~/.algocline/hub_cache/{hash}.json` where hash is FNV-1a of the
68//! URL.  TTL is 1 hour.
69//!
70//! ## Registry persistence
71//!
72//! `~/.algocline/hub_registries.json` records source URLs from
73//! `pkg_install` and `card_install`.  Written atomically (tempfile +
74//! rename) to avoid corruption on interruption.
75
76use std::collections::{HashMap, HashSet};
77use std::path::PathBuf;
78
79use serde::{Deserialize, Serialize};
80
81use super::list_opts::{
82    apply_sort_by_value, matches_filter, parse_sort, project_fields, resolve_fields, ListOpts,
83    HUB_SEARCH_FULL, HUB_SEARCH_SUMMARY,
84};
85use super::manifest;
86use super::resolve::AUTO_INSTALL_SOURCES;
87use super::AppService;
88
89// ─── Constants ─────────────────────────────────────────────────
90
91/// Cache TTL in seconds (1 hour).
92const CACHE_TTL_SECS: u64 = 3600;
93
94/// HTTP request timeout (30 seconds).
95const HTTP_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(30);
96
97// ─── Index schema ──────────────────────────────────────────────
98
99/// Remote index — same shape as the local index so merge is trivial.
100#[derive(Debug, Clone, Serialize, Deserialize)]
101pub(crate) struct HubIndex {
102    pub schema_version: String,
103    #[serde(default)]
104    pub updated_at: String,
105    #[serde(default)]
106    pub packages: Vec<IndexEntry>,
107}
108
109/// One package in the index.
110#[derive(Debug, Clone, Serialize, Deserialize)]
111pub(crate) struct IndexEntry {
112    pub name: String,
113    #[serde(default)]
114    pub version: String,
115    #[serde(default)]
116    pub description: String,
117    #[serde(default)]
118    pub category: String,
119    #[serde(default)]
120    pub source: String,
121    #[serde(default)]
122    pub card_count: usize,
123    #[serde(default)]
124    pub best_card: Option<BestCard>,
125    /// Leading `---` docstring lines from init.lua (for full-text search).
126    #[serde(default)]
127    pub docstring: String,
128}
129
130/// Best card summary within a package.
131#[derive(Debug, Clone, Serialize, Deserialize)]
132pub(crate) struct BestCard {
133    pub card_id: String,
134    #[serde(default)]
135    pub model: String,
136    #[serde(default)]
137    pub pass_rate: f64,
138    #[serde(default)]
139    pub scenario: String,
140}
141
142/// Search result — index entry enriched with local install state.
143///
144/// `docstring` is `skip_serializing` so the default serde output never
145/// exposes it (docstrings can be large and dominate payload size). The
146/// `hub_search` projection path puts it back into the JSON object when
147/// the resolved field set contains `"docstring"`, via
148/// [`SearchResult::to_value_with_optional_docstring`].
149///
150/// `docstring_matched` is a query-time signal: it is `Some(true)` only
151/// when the query hit docstring and none of {name, description, category}.
152/// Otherwise (no query, or query hit any of the other fields) it is
153/// `None` and omitted from the output.
154#[derive(Debug, Clone, Serialize)]
155struct SearchResult {
156    name: String,
157    version: String,
158    description: String,
159    category: String,
160    source: String,
161    installed: bool,
162    card_count: usize,
163    best_card: Option<BestCard>,
164    #[serde(skip_serializing)]
165    docstring: String,
166    #[serde(skip_serializing_if = "Option::is_none")]
167    docstring_matched: Option<bool>,
168}
169
170impl SearchResult {
171    /// Serialize `self` to a JSON `Value`, optionally re-attaching
172    /// `docstring` to the resulting object.
173    ///
174    /// `skip_serializing` removes `docstring` from every serde output
175    /// path. When projection selects `docstring` as an output field, we
176    /// need to put it back — this helper bridges that gap by inserting
177    /// the field manually into the resulting `Value::Object`.
178    ///
179    /// Returns the original `Value` unchanged if serialization produced
180    /// a non-object (should not happen for `SearchResult`, but we stay
181    /// defensive because the downstream `project_fields` contract
182    /// tolerates non-objects).
183    fn to_value_with_optional_docstring(&self, include_docstring: bool) -> serde_json::Value {
184        let mut v = serde_json::to_value(self).unwrap_or(serde_json::Value::Null);
185        if include_docstring {
186            if let serde_json::Value::Object(ref mut map) = v {
187                map.insert(
188                    "docstring".to_string(),
189                    serde_json::Value::String(self.docstring.clone()),
190                );
191            }
192        }
193        v
194    }
195}
196
197// ─── Hub registries ───────────────────────────────────────────
198//
199// Persistent file (`~/.algocline/hub_registries.json`) that records
200// source URLs from `pkg_install` and `card_install`.  This is the
201// primary source for Hub index URL discovery — the manifest and
202// `AUTO_INSTALL_SOURCES` serve as fallback seeds.
203
204/// One entry in `hub_registries.json`.
205#[derive(Debug, Clone, Serialize, Deserialize)]
206pub(crate) struct RegistryEntry {
207    /// Original source URL (Git repo or local path).
208    pub source: String,
209    /// How it was registered: "pkg_install" or "card_install".
210    pub origin: String,
211    /// ISO 8601 timestamp of when the entry was added.
212    pub added_at: String,
213}
214
215/// Top-level registries file.
216#[derive(Debug, Clone, Serialize, Deserialize, Default)]
217pub(crate) struct HubRegistries {
218    pub registries: Vec<RegistryEntry>,
219}
220
221fn registries_path() -> Result<PathBuf, String> {
222    let home = dirs::home_dir().ok_or("Cannot determine home directory")?;
223    Ok(home.join(".algocline").join("hub_registries.json"))
224}
225
226/// Load registries from disk.  Returns empty list if file is missing.
227fn load_registries() -> HubRegistries {
228    let path = match registries_path() {
229        Ok(p) => p,
230        Err(_) => return HubRegistries::default(),
231    };
232    if !path.exists() {
233        return HubRegistries::default();
234    }
235    std::fs::read_to_string(&path)
236        .ok()
237        .and_then(|c| serde_json::from_str(&c).ok())
238        .unwrap_or_default()
239}
240
241/// Register a source URL.  Deduplicates by normalized URL.
242///
243/// Uses atomic write (tempfile + rename) to avoid partial writes if
244/// the process is interrupted.  Read-modify-write is not locked across
245/// processes, but MCP servers are single-process so this is safe in
246/// practice.
247pub(crate) fn register_source(source: &str, origin: &str) {
248    let normalized = source.trim_end_matches('/').to_string();
249    if normalized.is_empty() {
250        return;
251    }
252    // Skip local paths — they can't host a remote index
253    if normalized.starts_with('/') || normalized.starts_with('.') {
254        return;
255    }
256
257    let path = match registries_path() {
258        Ok(p) => p,
259        Err(_) => return,
260    };
261    if let Some(parent) = path.parent() {
262        let _ = std::fs::create_dir_all(parent);
263    }
264
265    // Re-read from disk right before write to minimize TOCTOU window
266    let mut reg = load_registries();
267
268    // Already registered?
269    if reg
270        .registries
271        .iter()
272        .any(|e| e.source.trim_end_matches('/') == normalized)
273    {
274        return;
275    }
276
277    reg.registries.push(RegistryEntry {
278        source: normalized,
279        origin: origin.to_string(),
280        added_at: manifest::now_iso8601(),
281    });
282
283    // Atomic write: write to temp file, then rename
284    match serde_json::to_string_pretty(&reg) {
285        Ok(json) => {
286            let tmp_path = path.with_extension("json.tmp");
287            if let Err(e) = std::fs::write(&tmp_path, &json) {
288                tracing::warn!("failed to write hub registries tmp: {e}");
289                return;
290            }
291            if let Err(e) = std::fs::rename(&tmp_path, &path) {
292                tracing::warn!("failed to rename hub registries: {e}");
293                // Clean up tmp on failure
294                let _ = std::fs::remove_file(&tmp_path);
295            }
296        }
297        Err(e) => tracing::warn!("failed to serialize hub registries: {e}"),
298    }
299}
300
301// ─── Hub config ──────────────────────────────────────────────
302//
303// Optional `[hub]` section in `~/.algocline/config.toml`:
304//
305//   [hub]
306//   collection_url = "https://raw.githubusercontent.com/.../hub_index.json"
307//
308// When set, this is fetched as Tier 0 (the aggregated collection
309// index containing all known packages, including uninstalled ones).
310
311/// Read the `[hub].collection_url` from `~/.algocline/config.toml`.
312fn collection_url_from_config() -> Option<String> {
313    let home = dirs::home_dir()?;
314    let path = home.join(".algocline").join("config.toml");
315    let content = std::fs::read_to_string(&path).ok()?;
316    let doc: toml_edit::DocumentMut = content.parse().ok()?;
317    let url = doc
318        .get("hub")?
319        .get("collection_url")?
320        .as_str()?
321        .trim()
322        .to_string();
323    if url.is_empty() {
324        None
325    } else {
326        Some(url)
327    }
328}
329
330// ─── Index URL discovery ──────────────────────────────────────
331//
332// Derives remote index URLs from:
333//   0. Hub Collection URL (from config.toml) — aggregated index
334//   1. Hub registries (`hub_registries.json`) — primary source
335//   2. Unique `source` fields in the installed-packages manifest
336//   3. `AUTO_INSTALL_SOURCES` as fallback seeds (for first run)
337//
338// GitHub repos are transformed:
339//   https://github.com/{owner}/{repo}  →
340//   https://raw.githubusercontent.com/{owner}/{repo}/main/hub_index.json
341
342/// Convert a GitHub repo URL to a raw `hub_index.json` URL.
343/// Returns `None` for non-GitHub URLs (future: support other hosts).
344fn repo_to_index_url(repo_url: &str) -> Option<String> {
345    let trimmed = repo_url.trim_end_matches('/').trim_end_matches(".git");
346    if let Some(path) = trimmed.strip_prefix("https://github.com/") {
347        // path = "owner/repo"
348        let parts: Vec<&str> = path.splitn(3, '/').collect();
349        if parts.len() >= 2 {
350            return Some(format!(
351                "https://raw.githubusercontent.com/{}/{}/main/hub_index.json",
352                parts[0], parts[1]
353            ));
354        }
355    }
356    // Non-GitHub URL: assume it's already a direct index URL
357    if trimmed.ends_with(".json") {
358        Some(trimmed.to_string())
359    } else {
360        None
361    }
362}
363
364/// Collect unique index URLs from config + registries + manifest + bundled seeds.
365fn discover_index_urls() -> Vec<String> {
366    let mut index_urls: Vec<String> = Vec::new();
367
368    // 0. From config.toml [hub].collection_url (Tier 0 — aggregated collection)
369    if let Some(url) = collection_url_from_config() {
370        index_urls.push(url);
371    }
372
373    let mut repo_urls: HashSet<String> = HashSet::new();
374
375    // 1. From hub registries (primary)
376    let reg = load_registries();
377    for entry in &reg.registries {
378        let normalized = entry.source.trim_end_matches('/').to_string();
379        if !normalized.is_empty() {
380            repo_urls.insert(normalized);
381        }
382    }
383
384    // 2. From manifest (catch sources registered before hub_registries existed)
385    if let Ok(m) = manifest::load_manifest() {
386        for entry in m.packages.values() {
387            let normalized = entry.source.trim_end_matches('/').to_string();
388            if !normalized.is_empty() && !normalized.starts_with('/') {
389                repo_urls.insert(normalized);
390            }
391        }
392    }
393
394    // 3. Fallback: bundled sources (ensures at least these are checked)
395    for url in AUTO_INSTALL_SOURCES {
396        repo_urls.insert(url.to_string());
397    }
398
399    // 4. Transform repo URLs → index URLs, dedup against Tier 0
400    let existing: HashSet<String> = index_urls.iter().cloned().collect();
401    let mut derived: Vec<String> = repo_urls
402        .iter()
403        .filter_map(|url| repo_to_index_url(url))
404        .filter(|url| !existing.contains(url))
405        .collect();
406    derived.sort();
407    derived.dedup();
408    index_urls.extend(derived);
409
410    index_urls
411}
412
413// ─── Per-source cache ─────────────────────────────────────────
414//
415// Each remote index is cached separately at
416// `~/.algocline/hub_cache/{hash}.json` where hash is derived from
417// the index URL. This avoids mixing data from different registries
418// and allows per-source TTL validation.
419
420fn cache_dir() -> Result<PathBuf, String> {
421    let home = dirs::home_dir().ok_or("Cannot determine home directory")?;
422    Ok(home.join(".algocline").join("hub_cache"))
423}
424
425fn cache_key(url: &str) -> String {
426    // Simple hash: use the URL bytes to produce a stable hex string.
427    // Avoids pulling in a hash crate — good enough for cache file naming.
428    let mut h: u64 = 0xcbf2_9ce4_8422_2325; // FNV-1a offset basis
429    for b in url.as_bytes() {
430        h ^= *b as u64;
431        h = h.wrapping_mul(0x0100_0000_01b3); // FNV prime
432    }
433    format!("{h:016x}")
434}
435
436/// Load cached remote index for a specific URL if fresh (within TTL).
437fn load_cached(url: &str) -> Option<HubIndex> {
438    let dir = cache_dir().ok()?;
439    let path = dir.join(format!("{}.json", cache_key(url)));
440    if !path.exists() {
441        return None;
442    }
443    let metadata = std::fs::metadata(&path).ok()?;
444    let age = metadata.modified().ok()?.elapsed().ok()?;
445    if age.as_secs() > CACHE_TTL_SECS {
446        return None;
447    }
448    let content = std::fs::read_to_string(&path).ok()?;
449    serde_json::from_str(&content).ok()
450}
451
452/// Save remote index to per-source cache file.
453fn save_cached(url: &str, index: &HubIndex) {
454    let dir = match cache_dir() {
455        Ok(d) => d,
456        Err(e) => {
457            tracing::warn!("hub cache dir unavailable: {e}");
458            return;
459        }
460    };
461    if let Err(e) = std::fs::create_dir_all(&dir) {
462        tracing::warn!("failed to create hub cache dir: {e}");
463        return;
464    }
465    let path = dir.join(format!("{}.json", cache_key(url)));
466    match serde_json::to_string_pretty(index) {
467        Ok(json) => {
468            if let Err(e) = std::fs::write(&path, json) {
469                tracing::warn!("failed to write hub cache {}: {e}", path.display());
470            }
471        }
472        Err(e) => tracing::warn!("failed to serialize hub cache: {e}"),
473    }
474}
475
476// ─── Remote fetch ──────────────────────────────────────────────
477
478/// Fetch a single remote index by URL, using per-source cache.
479fn fetch_one(url: &str) -> Result<HubIndex, String> {
480    if let Some(cached) = load_cached(url) {
481        return Ok(cached);
482    }
483
484    let agent = ureq::Agent::new_with_config(
485        ureq::config::Config::builder()
486            .timeout_global(Some(HTTP_TIMEOUT))
487            .build(),
488    );
489    let body: String = agent
490        .get(url)
491        .call()
492        .map_err(|e| format!("Failed to fetch {url}: {e}"))?
493        .body_mut()
494        .read_to_string()
495        .map_err(|e| format!("Failed to read response from {url}: {e}"))?;
496
497    let index: HubIndex = serde_json::from_str(&body)
498        .map_err(|e| format!("Failed to parse index from {url}: {e}"))?;
499
500    save_cached(url, &index);
501    Ok(index)
502}
503
504/// Fetch all discovered remote indices and merge into one.
505/// Falls back gracefully: failed sources are skipped with warnings.
506fn fetch_remote_indices() -> (HubIndex, Vec<String>) {
507    let urls = discover_index_urls();
508    let mut all_packages: Vec<IndexEntry> = Vec::new();
509    let mut seen_names: HashSet<String> = HashSet::new();
510    let mut warnings: Vec<String> = Vec::new();
511
512    for url in &urls {
513        match fetch_one(url) {
514            Ok(index) => {
515                for entry in index.packages {
516                    if seen_names.insert(entry.name.clone()) {
517                        all_packages.push(entry);
518                    }
519                    // If duplicate name across sources, first wins
520                }
521            }
522            Err(e) => {
523                warnings.push(e);
524            }
525        }
526    }
527
528    if all_packages.is_empty() && !warnings.is_empty() {
529        warnings.insert(
530            0,
531            "all remote indices unavailable, showing local packages only".to_string(),
532        );
533    }
534
535    let merged = HubIndex {
536        schema_version: "hub_index/v0".into(),
537        updated_at: String::new(),
538        packages: all_packages,
539    };
540    (merged, warnings)
541}
542
543// ─── Local state ───────────────────────────────────────────────
544
545/// Build a set of locally installed package names from `installed.json`
546/// and the `~/.algocline/packages/` directory.
547fn installed_packages() -> HashMap<String, Option<String>> {
548    let mut map = HashMap::new();
549
550    // From manifest (has version info)
551    if let Ok(m) = manifest::load_manifest() {
552        for (name, entry) in &m.packages {
553            map.insert(name.clone(), entry.version.clone());
554        }
555    }
556
557    // Also scan packages/ dir in case manifest is stale
558    if let Some(home) = dirs::home_dir() {
559        let pkg_dir = home.join(".algocline").join("packages");
560        if let Ok(entries) = std::fs::read_dir(&pkg_dir) {
561            for entry in entries.flatten() {
562                if entry.path().is_dir() {
563                    if let Some(name) = entry.file_name().to_str() {
564                        map.entry(name.to_string()).or_insert(None);
565                    }
566                }
567            }
568        }
569    }
570
571    map
572}
573
574/// Count local cards per package from `~/.algocline/cards/{pkg}/`.
575fn local_card_counts() -> HashMap<String, usize> {
576    let mut map = HashMap::new();
577    let home = match dirs::home_dir() {
578        Some(h) => h,
579        None => return map,
580    };
581    let cards_dir = home.join(".algocline").join("cards");
582    let entries = match std::fs::read_dir(&cards_dir) {
583        Ok(e) => e,
584        Err(_) => return map,
585    };
586    for entry in entries.flatten() {
587        if !entry.path().is_dir() {
588            continue;
589        }
590        let pkg = match entry.file_name().to_str() {
591            Some(n) => n.to_string(),
592            None => continue,
593        };
594        let count = std::fs::read_dir(entry.path())
595            .map(|es| {
596                es.flatten()
597                    .filter(|e| e.path().extension().is_some_and(|ext| ext == "toml"))
598                    .count()
599            })
600            .unwrap_or(0);
601        if count > 0 {
602            map.insert(pkg, count);
603        }
604    }
605    map
606}
607
608/// Count eval results for a specific package by scanning `~/.algocline/evals/`.
609///
610/// Reads only `.meta.json` files (lightweight) to check the strategy field.
611/// Falls back to reading full eval JSON if meta is missing.
612fn count_evals_for_pkg(pkg: &str) -> usize {
613    let home = match dirs::home_dir() {
614        Some(h) => h,
615        None => return 0,
616    };
617    let evals_dir = home.join(".algocline").join("evals");
618    let entries = match std::fs::read_dir(&evals_dir) {
619        Ok(e) => e,
620        Err(_) => return 0,
621    };
622
623    // Collect all filenames first so ordering doesn't matter.
624    // We track stems that have a .meta.json to avoid reading the full eval JSON.
625    let mut meta_stems: HashSet<String> = HashSet::new();
626    let mut meta_matches: usize = 0;
627    let mut non_meta_paths: Vec<(PathBuf, String)> = Vec::new(); // (path, stem)
628
629    for entry in entries.flatten() {
630        let path = entry.path();
631        let name = match path.file_name().and_then(|n| n.to_str()) {
632            Some(n) => n.to_string(),
633            None => continue,
634        };
635
636        if name.ends_with(".meta.json") {
637            let stem = name.trim_end_matches(".meta.json").to_string();
638            meta_stems.insert(stem);
639            if let Ok(content) = std::fs::read_to_string(&path) {
640                if let Ok(val) = serde_json::from_str::<serde_json::Value>(&content) {
641                    if val.get("strategy").and_then(|s| s.as_str()) == Some(pkg) {
642                        meta_matches += 1;
643                    }
644                }
645            }
646            continue;
647        }
648
649        // Skip non-json or comparison files
650        if !name.ends_with(".json") || name.starts_with("compare_") {
651            continue;
652        }
653
654        let stem = path
655            .file_stem()
656            .and_then(|s| s.to_str())
657            .unwrap_or("")
658            .to_string();
659        non_meta_paths.push((path, stem));
660    }
661
662    // Only read full eval JSON for entries without a .meta.json
663    let fallback_matches = non_meta_paths
664        .iter()
665        .filter(|(_, stem)| !meta_stems.contains(stem))
666        .filter(|(path, _)| {
667            std::fs::read_to_string(path)
668                .ok()
669                .and_then(|c| serde_json::from_str::<serde_json::Value>(&c).ok())
670                .and_then(|v| v.get("strategy")?.as_str().map(|s| s == pkg))
671                .unwrap_or(false)
672        })
673        .count();
674
675    meta_matches + fallback_matches
676}
677
678// ─── Merge ─────────────────────────────────────────────────────
679
680/// Merge remote index with local install state.
681///
682/// When a package is installed locally and the remote index lacks a
683/// docstring (pre-v0.21 indices), the docstring is extracted from the
684/// local `init.lua` so that full-text search works immediately.
685fn merge(remote: &HubIndex) -> Vec<SearchResult> {
686    let installed = installed_packages();
687    let card_counts = local_card_counts();
688    let pkg_dir = dirs::home_dir().map(|h| h.join(".algocline").join("packages"));
689
690    let mut seen: HashSet<String> = HashSet::new();
691    let mut results: Vec<SearchResult> = Vec::new();
692
693    for entry in &remote.packages {
694        let is_installed = installed.contains_key(&entry.name);
695        let local_cards = card_counts.get(&entry.name).copied().unwrap_or(0);
696
697        // Supplement empty docstring from local init.lua when installed
698        let docstring = if entry.docstring.is_empty() && is_installed {
699            pkg_dir
700                .as_ref()
701                .map(|d| extract_docstring(&d.join(&entry.name).join("init.lua")))
702                .unwrap_or_default()
703        } else {
704            entry.docstring.clone()
705        };
706
707        seen.insert(entry.name.clone());
708        results.push(SearchResult {
709            name: entry.name.clone(),
710            version: entry.version.clone(),
711            description: entry.description.clone(),
712            category: entry.category.clone(),
713            source: entry.source.clone(),
714            installed: is_installed,
715            card_count: if is_installed && local_cards > entry.card_count {
716                local_cards
717            } else {
718                entry.card_count
719            },
720            best_card: entry.best_card.clone(),
721            docstring,
722            docstring_matched: None,
723        });
724    }
725
726    // Add local-only packages (not in remote index).
727    for (name, version) in &installed {
728        if seen.contains(name) {
729            continue;
730        }
731        let docstring = pkg_dir
732            .as_ref()
733            .map(|d| extract_docstring(&d.join(name).join("init.lua")))
734            .unwrap_or_default();
735        results.push(SearchResult {
736            name: name.clone(),
737            version: version.clone().unwrap_or_default(),
738            description: String::new(),
739            category: String::new(),
740            source: String::new(),
741            installed: true,
742            card_count: card_counts.get(name).copied().unwrap_or(0),
743            best_card: None,
744            docstring,
745            docstring_matched: None,
746        });
747    }
748
749    results
750}
751
752// ─── Search (filtering) ───────────────────────────────────────
753
754fn matches_query(result: &SearchResult, query: &str) -> bool {
755    let q = query.to_lowercase();
756    result.name.to_lowercase().contains(&q)
757        || result.description.to_lowercase().contains(&q)
758        || result.category.to_lowercase().contains(&q)
759        || result.docstring.to_lowercase().contains(&q)
760}
761
762// ─── Index generation (reindex) ───────────────────────────────
763
764/// Extract leading `---` docstring lines from an `init.lua` file.
765///
766/// Collects consecutive lines starting with `---` (Lua doc-comment)
767/// from the beginning of the file.  Stops at the first non-doc line.
768/// Returns a single string with lines joined by newline, stripped of
769/// the `---` prefix.  Used for full-text search in hub_search.
770fn extract_docstring(path: &std::path::Path) -> String {
771    let content = match std::fs::read_to_string(path) {
772        Ok(c) => c,
773        Err(_) => return String::new(),
774    };
775    let mut lines = Vec::new();
776    for line in content.lines() {
777        let trimmed = line.trim_start();
778        if let Some(rest) = trimmed.strip_prefix("---") {
779            lines.push(rest.trim().to_string());
780        } else if trimmed.is_empty() {
781            // Allow blank lines within the docstring block
782            continue;
783        } else {
784            break;
785        }
786    }
787    lines.join("\n")
788}
789
790/// Parse `M.meta = { ... }` from an `init.lua` file without Lua VM.
791///
792/// Extracts (name, version, description, category) from the first
793/// `M.meta = { ... }` block found anywhere in the file.
794///
795/// Supports string concatenation: `description = "foo " .. "bar"` is
796/// collected as `"foo bar"`.
797///
798/// **Limitation**: Only supports flat key-value pairs inside `M.meta`.
799/// Nested tables (e.g. `tags = { ... }`) are skipped via brace-depth
800/// tracking. `M.meta` fields are expected to be simple (possibly
801/// concatenated) string literals.
802fn parse_meta_from_init_lua(path: &std::path::Path) -> Option<(String, String, String, String)> {
803    let content = std::fs::read_to_string(path).ok()?;
804    let head = content.as_str();
805
806    // Find M.meta = { ... } block (with brace-depth tracking).
807    // Skip occurrences inside Lua line comments (`-- ...`) so that
808    // docstrings mentioning "M.meta" do not hijack the search.
809    let mut search_from = 0;
810    let meta_start = loop {
811        let rel = head[search_from..].find("M.meta")?;
812        let pos = search_from + rel;
813        let line_start = head[..pos].rfind('\n').map(|i| i + 1).unwrap_or(0);
814        if !head[line_start..pos].contains("--") {
815            break pos;
816        }
817        search_from = pos + "M.meta".len();
818    };
819    let brace_start = head[meta_start..].find('{')? + meta_start;
820
821    // Track brace depth to handle nested tables correctly
822    let mut depth = 0;
823    let mut brace_end = None;
824    for (i, ch) in head[brace_start..].char_indices() {
825        match ch {
826            '{' => depth += 1,
827            '}' => {
828                depth -= 1;
829                if depth == 0 {
830                    brace_end = Some(brace_start + i);
831                    break;
832                }
833            }
834            _ => {}
835        }
836    }
837    let brace_end = brace_end?;
838    let block = &head[brace_start + 1..brace_end];
839
840    let extract = |field: &str| -> String {
841        // Match: field = "value" [.. "value" ...] with word-boundary check.
842        // Walk through all occurrences of `field`, skipping matches inside
843        // longer identifiers (e.g. "short_description"). On the first valid
844        // occurrence, collect one or more `"..."` string literals joined by
845        // `..` concatenation operators.
846        let mut search_from = 0;
847        while let Some(rel) = block[search_from..].find(field) {
848            let pos = search_from + rel;
849            let word_boundary = pos == 0 || {
850                let prev = block.as_bytes()[pos - 1];
851                !(prev.is_ascii_alphanumeric() || prev == b'_')
852            };
853            if word_boundary {
854                let after = &block[pos + field.len()..];
855                let mut collected = String::new();
856                let mut cursor = 0usize;
857                let mut found_any = false;
858                loop {
859                    let rest = &after[cursor..];
860                    let Some(q_start_rel) = rest.find('"') else {
861                        break;
862                    };
863                    if found_any {
864                        // Between the prior closing quote and this opening
865                        // quote, only whitespace and a single `..` operator
866                        // are allowed. Anything else (comma, another field,
867                        // etc.) ends the value.
868                        let between = &rest[..q_start_rel];
869                        if between.trim() != ".." {
870                            break;
871                        }
872                    }
873                    let lit_start = cursor + q_start_rel + 1;
874                    let Some(q_end_rel) = after[lit_start..].find('"') else {
875                        break;
876                    };
877                    collected.push_str(&after[lit_start..lit_start + q_end_rel]);
878                    cursor = lit_start + q_end_rel + 1;
879                    found_any = true;
880                }
881                if found_any {
882                    return collected;
883                }
884            }
885            search_from = pos + field.len();
886        }
887        String::new()
888    };
889
890    let name = extract("name");
891    if name.is_empty() {
892        return None;
893    }
894    Some((
895        name,
896        extract("version"),
897        extract("description"),
898        extract("category"),
899    ))
900}
901
902/// Build a hub index by scanning a packages directory.
903///
904/// When `source_dir` is provided, scans that directory directly
905/// (for generating an index from a repo checkout).  Metadata comes
906/// only from `init.lua` — no manifest lookup, no card counts.
907///
908/// When `source_dir` is `None`, scans `~/.algocline/packages/` and
909/// enriches entries with manifest source and local card counts.
910fn build_index(source_dir: Option<&std::path::Path>) -> HubIndex {
911    let empty = || HubIndex {
912        schema_version: "hub_index/v0".into(),
913        updated_at: super::manifest::now_iso8601(),
914        packages: Vec::new(),
915    };
916
917    let pkg_dir = match source_dir {
918        Some(d) => d.to_path_buf(),
919        None => {
920            let home = match dirs::home_dir() {
921                Some(h) => h,
922                None => return empty(),
923            };
924            home.join(".algocline").join("packages")
925        }
926    };
927
928    let use_local_state = source_dir.is_none();
929    let card_counts = if use_local_state {
930        local_card_counts()
931    } else {
932        HashMap::new()
933    };
934    let manifest = if use_local_state {
935        manifest::load_manifest().unwrap_or_default()
936    } else {
937        manifest::Manifest::default()
938    };
939
940    let mut entries = Vec::new();
941
942    let dir_entries = match std::fs::read_dir(&pkg_dir) {
943        Ok(e) => e,
944        Err(_) => return empty(),
945    };
946
947    for entry in dir_entries.flatten() {
948        if !entry.path().is_dir() {
949            continue;
950        }
951        let dir_name = match entry.file_name().to_str() {
952            Some(n) if !n.starts_with('.') && !n.starts_with('_') => n.to_string(),
953            _ => continue,
954        };
955
956        let init_lua = entry.path().join("init.lua");
957        if !init_lua.exists() {
958            continue;
959        }
960
961        let (name, version, description, category) = parse_meta_from_init_lua(&init_lua)
962            .unwrap_or_else(|| {
963                (
964                    dir_name.clone(),
965                    String::new(),
966                    String::new(),
967                    String::new(),
968                )
969            });
970
971        let docstring = extract_docstring(&init_lua);
972
973        // Use manifest source only for local-state mode
974        let source = manifest
975            .packages
976            .get(&dir_name)
977            .map(|e| e.source.clone())
978            .unwrap_or_default();
979
980        entries.push(IndexEntry {
981            name,
982            version,
983            description,
984            category,
985            source,
986            card_count: card_counts.get(&dir_name).copied().unwrap_or(0),
987            best_card: None,
988            docstring,
989        });
990    }
991
992    entries.sort_by(|a, b| a.name.cmp(&b.name));
993
994    HubIndex {
995        schema_version: "hub_index/v0".into(),
996        updated_at: super::manifest::now_iso8601(),
997        packages: entries,
998    }
999}
1000
1001// ─── Public API ────────────────────────────────────────────────
1002
1003impl AppService {
1004    /// Generate a hub index from a packages directory.
1005    ///
1006    /// When `source_dir` is provided, scans that directory (e.g. a
1007    /// repo checkout) — pure metadata extraction, no manifest or card
1008    /// data mixed in.  When omitted, scans `~/.algocline/packages/`.
1009    ///
1010    /// Writes the index to `output_path` (for CI / publishing).
1011    /// Does NOT touch the remote search cache.
1012    pub fn hub_reindex(
1013        &self,
1014        output_path: Option<&str>,
1015        source_dir: Option<&str>,
1016    ) -> Result<String, String> {
1017        let src = source_dir.map(std::path::Path::new);
1018        if let Some(d) = src {
1019            if !d.is_dir() {
1020                return Err(format!("source_dir '{}' is not a directory", d.display()));
1021            }
1022        }
1023        let index = build_index(src);
1024
1025        let written_path = if let Some(path) = output_path {
1026            let json = serde_json::to_string_pretty(&index)
1027                .map_err(|e| format!("Failed to serialize index: {e}"))?;
1028            std::fs::write(path, &json)
1029                .map_err(|e| format!("Failed to write index to {path}: {e}"))?;
1030            Some(path.to_string())
1031        } else {
1032            None
1033        };
1034
1035        let response = serde_json::json!({
1036            "package_count": index.packages.len(),
1037            "updated_at": index.updated_at,
1038            "output_path": written_path,
1039            "source_dir": source_dir,
1040        });
1041        Ok(response.to_string())
1042    }
1043
1044    /// Show detailed information for a single package.
1045    ///
1046    /// Aggregates package metadata (from index or local `init.lua`),
1047    /// all Cards, aliases, and eval stats into one response.
1048    pub fn hub_info(&self, pkg: &str) -> Result<String, String> {
1049        use algocline_engine::card;
1050
1051        // Guard against path traversal
1052        if pkg.contains("..") || pkg.contains('/') || pkg.contains('\\') {
1053            return Err(format!("Invalid package name: '{pkg}'"));
1054        }
1055
1056        // Package metadata: try remote index first, fall back to local
1057        let installed = installed_packages();
1058        let is_installed = installed.contains_key(pkg);
1059
1060        let (version, description, category, source) = {
1061            // Try to get from remote index
1062            let (remote, _) = fetch_remote_indices();
1063            if let Some(entry) = remote.packages.iter().find(|e| e.name == pkg) {
1064                (
1065                    entry.version.clone(),
1066                    entry.description.clone(),
1067                    entry.category.clone(),
1068                    entry.source.clone(),
1069                )
1070            } else if is_installed {
1071                // Fall back to local init.lua parse
1072                let home = dirs::home_dir().ok_or("Cannot determine home directory")?;
1073                let init_lua = home
1074                    .join(".algocline")
1075                    .join("packages")
1076                    .join(pkg)
1077                    .join("init.lua");
1078                let meta = parse_meta_from_init_lua(&init_lua);
1079                let manifest_source = manifest::load_manifest()
1080                    .ok()
1081                    .and_then(|m| m.packages.get(pkg).map(|e| e.source.clone()))
1082                    .unwrap_or_default();
1083                match meta {
1084                    Some((_, v, d, c)) => (v, d, c, manifest_source),
1085                    None => (
1086                        installed.get(pkg).cloned().flatten().unwrap_or_default(),
1087                        String::new(),
1088                        String::new(),
1089                        manifest_source,
1090                    ),
1091                }
1092            } else {
1093                return Err(format!(
1094                    "Package '{pkg}' not found in remote indices or locally installed packages"
1095                ));
1096            }
1097        };
1098
1099        // Cards for this package (single call, reused for stats)
1100        let card_rows = card::list(Some(pkg)).unwrap_or_default();
1101        let cards_json = card::summaries_to_json(&card_rows);
1102
1103        // Aliases for this package
1104        let aliases_json = match card::alias_list(Some(pkg)) {
1105            Ok(rows) => card::aliases_to_json(&rows),
1106            Err(_) => serde_json::json!([]),
1107        };
1108
1109        // Stats: card count, best pass_rate, eval count
1110        let card_count = card_rows.len();
1111        let best_pass_rate = card_rows
1112            .iter()
1113            .filter_map(|c| c.pass_rate)
1114            .fold(f64::NEG_INFINITY, f64::max);
1115        let best_pass_rate = if best_pass_rate.is_finite() {
1116            Some(best_pass_rate)
1117        } else {
1118            None
1119        };
1120
1121        // Eval count from evals directory
1122        let eval_count = count_evals_for_pkg(pkg);
1123
1124        let response = serde_json::json!({
1125            "pkg": {
1126                "name": pkg,
1127                "version": version,
1128                "description": description,
1129                "category": category,
1130                "source": source,
1131                "installed": is_installed,
1132            },
1133            "cards": cards_json,
1134            "aliases": aliases_json,
1135            "stats": {
1136                "card_count": card_count,
1137                "eval_count": eval_count,
1138                "best_pass_rate": best_pass_rate,
1139            },
1140        });
1141        Ok(response.to_string())
1142    }
1143
1144    /// Search packages across remote indices + local state.
1145    ///
1146    /// Index URLs are discovered from hub registries, manifest sources,
1147    /// and `AUTO_INSTALL_SOURCES`. Each source is cached independently.
1148    ///
1149    /// ## List-tool options (`opts`)
1150    ///
1151    /// The `opts` parameter carries the list-tool primitives
1152    /// (`limit / sort / filter / fields / verbose`) shared with other
1153    /// list-style MCP tools. Defaults:
1154    ///
1155    /// - `limit` — 50 when `None`. `Some(0)` means **no limit** (return
1156    ///   all matching entries — empty-means-all idiom).
1157    /// - `sort` — `"-installed,name"` when `None` (installed first, then
1158    ///   ascending by name).
1159    /// - `filter` — no additional filter. Legacy `category` /
1160    ///   `installed_only` parameters are merged into the filter map when
1161    ///   `filter` does not already contain those keys (explicit
1162    ///   `filter` wins on conflict).
1163    /// - `fields` / `verbose` — projection is applied to every entry in
1164    ///   the `results` array (see
1165    ///   [`super::list_opts::resolve_fields`]). Top-level keys
1166    ///   (`total`, `sources`, `warnings`) are never projected away.
1167    ///
1168    /// ## docstring handling
1169    ///
1170    /// [`SearchResult::docstring`] is `skip_serializing`, so it is
1171    /// absent from the default serialized view. When the resolved
1172    /// projection contains `"docstring"`, it is re-injected into the
1173    /// per-entry JSON via
1174    /// [`SearchResult::to_value_with_optional_docstring`].
1175    pub(crate) fn hub_search(
1176        &self,
1177        query: Option<&str>,
1178        category: Option<&str>,
1179        installed_only: Option<bool>,
1180        opts: ListOpts,
1181    ) -> Result<String, String> {
1182        let (remote, warnings) = fetch_remote_indices();
1183        let mut results = merge(&remote);
1184
1185        // Filter by query (internal signal covers name/description/
1186        // category/docstring — `matches_query` unchanged).
1187        let query_lower = query.filter(|q| !q.is_empty()).map(|q| q.to_lowercase());
1188        if let Some(ref ql) = query_lower {
1189            results.retain(|r| matches_query(r, ql));
1190        }
1191
1192        // Compute docstring_matched per remaining hit: Some(true) only
1193        // when the query matched docstring and none of {name,
1194        // description, category}; otherwise None.
1195        if let Some(ref ql) = query_lower {
1196            for r in &mut results {
1197                let other_hit = r.name.to_lowercase().contains(ql)
1198                    || r.description.to_lowercase().contains(ql)
1199                    || r.category.to_lowercase().contains(ql);
1200                let doc_hit = r.docstring.to_lowercase().contains(ql);
1201                r.docstring_matched = if !other_hit && doc_hit {
1202                    Some(true)
1203                } else {
1204                    None
1205                };
1206            }
1207        }
1208
1209        // Build the effective filter map: start from explicit `opts.filter`,
1210        // then fold legacy `category` / `installed_only` in only if the
1211        // corresponding key is not already set (explicit filter wins).
1212        let mut filter_map: std::collections::HashMap<String, serde_json::Value> =
1213            opts.filter.unwrap_or_default();
1214        if let Some(cat) = category {
1215            filter_map
1216                .entry("category".to_string())
1217                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1218        }
1219        if let Some(only) = installed_only {
1220            // Preserve prior semantic: `installed_only=Some(false)` was a
1221            // no-op (it did not force `installed=false`). Only fold when
1222            // explicitly true.
1223            if only {
1224                filter_map
1225                    .entry("installed".to_string())
1226                    .or_insert(serde_json::Value::Bool(true));
1227            }
1228        }
1229
1230        // Resolve sort keys up-front so an invalid sort string errors out
1231        // before we touch results.
1232        let sort_str = opts.sort.as_deref().unwrap_or("-installed,name");
1233        let sort_keys = parse_sort(sort_str)?;
1234
1235        // Resolve projection fields; this also rejects unknown `verbose`
1236        // values before any heavy work.
1237        let fields = resolve_fields(
1238            opts.verbose.as_deref(),
1239            opts.fields.as_deref(),
1240            HUB_SEARCH_SUMMARY,
1241            HUB_SEARCH_FULL,
1242        )?;
1243        let include_docstring = fields.iter().any(|f| f == "docstring");
1244
1245        // Serialize each result to a Value (docstring optionally attached)
1246        // so filter/sort/projection work uniformly on JSON values.
1247        let mut items: Vec<serde_json::Value> = results
1248            .iter()
1249            .map(|r| r.to_value_with_optional_docstring(include_docstring))
1250            .collect();
1251
1252        // Filter AFTER serialization so filter keys can reference
1253        // projection-level shape (e.g. `category`, `installed`).
1254        if !filter_map.is_empty() {
1255            items.retain(|v| matches_filter(v, &filter_map));
1256        }
1257
1258        // Sort.
1259        apply_sort_by_value(&mut items, &sort_keys);
1260
1261        // Limit. `limit = Some(0)` means "no limit" (return all results)
1262        // — mirrors the `empty=all & some=filter` idiom used across the
1263        // list-tool contract. `None` falls back to the default cap (50).
1264        let total = items.len();
1265        let limit = opts.limit.unwrap_or(50);
1266        if limit > 0 {
1267            items.truncate(limit);
1268        }
1269
1270        // Projection (after truncation — unselected fields are stripped
1271        // from the kept entries only).
1272        let projected: Vec<serde_json::Value> = items
1273            .into_iter()
1274            .map(|v| project_fields(v, &fields))
1275            .collect();
1276
1277        // Collect discovered sources for transparency.
1278        let sources = discover_index_urls();
1279
1280        let mut json = serde_json::json!({
1281            "results": projected,
1282            "total": total,
1283            "sources": sources,
1284        });
1285        if !warnings.is_empty() {
1286            json["warnings"] = serde_json::json!(warnings);
1287        }
1288        Ok(json.to_string())
1289    }
1290}
1291
1292#[cfg(test)]
1293mod tests {
1294    use super::*;
1295
1296    #[test]
1297    fn repo_to_index_url_github() {
1298        assert_eq!(
1299            repo_to_index_url("https://github.com/ynishi/algocline-bundled-packages"),
1300            Some(
1301                "https://raw.githubusercontent.com/ynishi/algocline-bundled-packages/main/hub_index.json"
1302                    .to_string()
1303            )
1304        );
1305    }
1306
1307    #[test]
1308    fn repo_to_index_url_github_trailing_slash() {
1309        assert_eq!(
1310            repo_to_index_url("https://github.com/user/repo/"),
1311            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1312        );
1313    }
1314
1315    #[test]
1316    fn repo_to_index_url_github_dot_git() {
1317        assert_eq!(
1318            repo_to_index_url("https://github.com/user/repo.git"),
1319            Some("https://raw.githubusercontent.com/user/repo/main/hub_index.json".to_string())
1320        );
1321    }
1322
1323    #[test]
1324    fn repo_to_index_url_direct_json() {
1325        assert_eq!(
1326            repo_to_index_url("https://example.com/my_index.json"),
1327            Some("https://example.com/my_index.json".to_string())
1328        );
1329    }
1330
1331    #[test]
1332    fn repo_to_index_url_unknown_host_no_json() {
1333        assert_eq!(repo_to_index_url("https://example.com/some-repo"), None);
1334    }
1335
1336    #[test]
1337    fn repo_to_index_url_local_path() {
1338        assert_eq!(repo_to_index_url("/home/user/my-pkg"), None);
1339    }
1340
1341    #[test]
1342    fn cache_key_stable() {
1343        let k1 = cache_key("https://example.com/index.json");
1344        let k2 = cache_key("https://example.com/index.json");
1345        assert_eq!(k1, k2);
1346        assert_eq!(k1.len(), 16); // 16 hex chars
1347    }
1348
1349    #[test]
1350    fn cache_key_different_urls() {
1351        let k1 = cache_key("https://a.com/index.json");
1352        let k2 = cache_key("https://b.com/index.json");
1353        assert_ne!(k1, k2);
1354    }
1355
1356    #[test]
1357    fn parse_meta_flat() {
1358        let tmp = tempfile::tempdir().unwrap();
1359        let path = tmp.path().join("init.lua");
1360        std::fs::write(
1361            &path,
1362            r#"
1363local M = {}
1364M.meta = {
1365    name = "my_pkg",
1366    version = "1.0.0",
1367    description = "A test package",
1368    category = "reasoning",
1369}
1370return M
1371"#,
1372        )
1373        .unwrap();
1374
1375        let result = parse_meta_from_init_lua(&path).unwrap();
1376        assert_eq!(result.0, "my_pkg");
1377        assert_eq!(result.1, "1.0.0");
1378        assert_eq!(result.2, "A test package");
1379        assert_eq!(result.3, "reasoning");
1380    }
1381
1382    #[test]
1383    fn parse_meta_nested_table() {
1384        let tmp = tempfile::tempdir().unwrap();
1385        let path = tmp.path().join("init.lua");
1386        std::fs::write(
1387            &path,
1388            r#"
1389local M = {}
1390M.meta = {
1391    name = "nested_pkg",
1392    tags = { "a", "b" },
1393    description = "After nested",
1394}
1395return M
1396"#,
1397        )
1398        .unwrap();
1399
1400        let result = parse_meta_from_init_lua(&path).unwrap();
1401        assert_eq!(result.0, "nested_pkg");
1402        assert_eq!(result.2, "After nested");
1403    }
1404
1405    /// End-to-end sanity check against a real bundled-packages checkout.
1406    /// Set `BUNDLED_PACKAGES_DIR` to the repo root and run with
1407    /// `cargo test -- --ignored parse_meta_real_bundled`.
1408    #[test]
1409    #[ignore]
1410    fn parse_meta_real_bundled_packages() {
1411        let Ok(root) = std::env::var("BUNDLED_PACKAGES_DIR") else {
1412            panic!("set BUNDLED_PACKAGES_DIR=/path/to/algocline-bundled-packages");
1413        };
1414        let root = std::path::Path::new(&root);
1415        let mut total = 0usize;
1416        let mut failed_parse: Vec<String> = Vec::new();
1417        let mut empty_desc: Vec<String> = Vec::new();
1418        for entry in std::fs::read_dir(root).unwrap().flatten() {
1419            if !entry.path().is_dir() {
1420                continue;
1421            }
1422            let name = entry.file_name().to_string_lossy().to_string();
1423            if name.starts_with('.') || name.starts_with('_') {
1424                continue;
1425            }
1426            let init_lua = entry.path().join("init.lua");
1427            if !init_lua.exists() {
1428                continue;
1429            }
1430            total += 1;
1431            match parse_meta_from_init_lua(&init_lua) {
1432                Some((_n, _v, desc, _c)) => {
1433                    if desc.is_empty() {
1434                        empty_desc.push(name);
1435                    }
1436                }
1437                None => failed_parse.push(name),
1438            }
1439        }
1440        assert!(total >= 100, "expected ≥100 pkgs, got {total}");
1441        assert!(
1442            failed_parse.is_empty(),
1443            "parse_meta returned None for {} pkgs: {:?}",
1444            failed_parse.len(),
1445            failed_parse
1446        );
1447        assert!(
1448            empty_desc.is_empty(),
1449            "empty description for {} pkgs: {:?}",
1450            empty_desc.len(),
1451            empty_desc
1452        );
1453    }
1454
1455    #[test]
1456    fn parse_meta_concat_string_literals() {
1457        // description = "foo " .. "bar " .. "baz" should produce "foo bar baz"
1458        let tmp = tempfile::tempdir().unwrap();
1459        let path = tmp.path().join("init.lua");
1460        std::fs::write(
1461            &path,
1462            r#"
1463local M = {}
1464M.meta = {
1465    name = "concat_pkg",
1466    version = "0.1.0",
1467    description = "Adaptive Branching MCTS — Thompson Sampling with dynamic "
1468        .. "wider/deeper decisions. GEN node mechanism for principled branching. "
1469        .. "Consistently outperforms standard MCTS and repeated sampling.",
1470    category = "reasoning",
1471}
1472return M
1473"#,
1474        )
1475        .unwrap();
1476
1477        let result = parse_meta_from_init_lua(&path).unwrap();
1478        assert_eq!(result.0, "concat_pkg");
1479        assert_eq!(result.1, "0.1.0");
1480        assert_eq!(
1481            result.2,
1482            "Adaptive Branching MCTS — Thompson Sampling with dynamic \
1483             wider/deeper decisions. GEN node mechanism for principled branching. \
1484             Consistently outperforms standard MCTS and repeated sampling."
1485        );
1486        assert_eq!(result.3, "reasoning");
1487    }
1488
1489    #[test]
1490    fn parse_meta_large_leading_docstring() {
1491        // M.meta located beyond 2KB (long leading --- docstring) must still parse.
1492        let tmp = tempfile::tempdir().unwrap();
1493        let path = tmp.path().join("init.lua");
1494        let mut content = String::new();
1495        // Generate ~4KB of leading comments
1496        for i in 0..120 {
1497            content.push_str(&format!(
1498                "--- line {i}: this is a long documentation comment to push M.meta beyond the old 2KB scan window\n"
1499            ));
1500        }
1501        content.push_str(
1502            r#"
1503local M = {}
1504M.meta = {
1505    name = "late_meta_pkg",
1506    version = "0.2.0",
1507    description = "Located past 2KB",
1508    category = "test",
1509}
1510return M
1511"#,
1512        );
1513        std::fs::write(&path, &content).unwrap();
1514        assert!(content.len() > 2048, "fixture should exceed 2KB");
1515
1516        let result = parse_meta_from_init_lua(&path).unwrap();
1517        assert_eq!(result.0, "late_meta_pkg");
1518        assert_eq!(result.1, "0.2.0");
1519        assert_eq!(result.2, "Located past 2KB");
1520        assert_eq!(result.3, "test");
1521    }
1522
1523    #[test]
1524    fn parse_meta_word_boundary() {
1525        let tmp = tempfile::tempdir().unwrap();
1526        let path = tmp.path().join("init.lua");
1527        std::fs::write(
1528            &path,
1529            r#"
1530local M = {}
1531M.meta = {
1532    name = "wb_pkg",
1533    short_description = "should not match",
1534    description = "correct one",
1535}
1536return M
1537"#,
1538        )
1539        .unwrap();
1540
1541        let result = parse_meta_from_init_lua(&path).unwrap();
1542        assert_eq!(result.0, "wb_pkg");
1543        assert_eq!(result.2, "correct one");
1544    }
1545
1546    #[test]
1547    fn merge_dedup_uses_hashset() {
1548        // Verify that merge correctly handles local-only packages
1549        // without O(n*m) behavior (structural test).
1550        let remote = HubIndex {
1551            schema_version: "hub_index/v0".into(),
1552            updated_at: String::new(),
1553            packages: vec![IndexEntry {
1554                name: "remote_only".into(),
1555                version: "1.0".into(),
1556                description: "from remote".into(),
1557                category: "test".into(),
1558                source: String::new(),
1559                card_count: 0,
1560                best_card: None,
1561                docstring: String::new(),
1562            }],
1563        };
1564
1565        let results = merge(&remote);
1566        // Should include remote_only + any locally installed packages
1567        assert!(results.iter().any(|r| r.name == "remote_only"));
1568    }
1569
1570    #[test]
1571    fn extract_docstring_collects_leading_comments() {
1572        let tmp = tempfile::tempdir().unwrap();
1573        let path = tmp.path().join("init.lua");
1574        std::fs::write(
1575            &path,
1576            r#"--- cascade — Multi-level difficulty routing with confidence gating
1577--- Based on: "FrugalGPT" (Chen et al., 2023)
1578--- Uses Thompson Sampling for budget allocation.
1579
1580local M = {}
1581M.meta = { name = "cascade" }
1582return M
1583"#,
1584        )
1585        .unwrap();
1586
1587        let doc = extract_docstring(&path);
1588        assert!(doc.contains("FrugalGPT"), "should contain paper ref");
1589        assert!(
1590            doc.contains("Thompson Sampling"),
1591            "should contain technique"
1592        );
1593        assert!(!doc.contains("local M"), "should not contain code");
1594    }
1595
1596    #[test]
1597    fn extract_docstring_empty_when_no_comments() {
1598        let tmp = tempfile::tempdir().unwrap();
1599        let path = tmp.path().join("init.lua");
1600        std::fs::write(&path, "local M = {}\nreturn M\n").unwrap();
1601
1602        let doc = extract_docstring(&path);
1603        assert!(doc.is_empty());
1604    }
1605
1606    #[test]
1607    fn matches_query_searches_docstring() {
1608        let result = SearchResult {
1609            name: "cascade".into(),
1610            version: "0.1.0".into(),
1611            description: "Multi-level routing".into(),
1612            category: "meta".into(),
1613            source: String::new(),
1614            installed: true,
1615            card_count: 0,
1616            best_card: None,
1617            docstring: "Based on FrugalGPT. Uses Thompson Sampling.".into(),
1618            docstring_matched: None,
1619        };
1620
1621        assert!(matches_query(&result, "thompson"), "docstring match");
1622        assert!(matches_query(&result, "FrugalGPT"), "docstring match case");
1623        assert!(matches_query(&result, "routing"), "description match");
1624        assert!(!matches_query(&result, "bayesian"), "no match");
1625    }
1626
1627    // ─── SearchResult::to_value_with_optional_docstring ────────────
1628    //
1629    // `docstring` is `skip_serializing` so the default JSON view must
1630    // omit it, and it is re-attached only when the projection path says
1631    // so. These tests pin the two branches of that helper — they are the
1632    // hinge that `verbose="full"` / `fields=["docstring"]` rely on.
1633
1634    fn sample_search_result() -> SearchResult {
1635        SearchResult {
1636            name: "cascade".into(),
1637            version: "0.1.0".into(),
1638            description: "Multi-level routing".into(),
1639            category: "reasoning".into(),
1640            source: "https://example.com/cascade".into(),
1641            installed: true,
1642            card_count: 3,
1643            best_card: None,
1644            docstring: "Based on FrugalGPT. Uses Thompson Sampling.".into(),
1645            docstring_matched: None,
1646        }
1647    }
1648
1649    #[test]
1650    fn to_value_default_omits_docstring() {
1651        let r = sample_search_result();
1652        let v = r.to_value_with_optional_docstring(false);
1653        let obj = v.as_object().expect("object");
1654        assert!(
1655            !obj.contains_key("docstring"),
1656            "default summary must not leak docstring"
1657        );
1658        assert_eq!(obj.get("name").and_then(|x| x.as_str()), Some("cascade"));
1659        // `docstring_matched` is Option<None> → `skip_serializing_if`
1660        // must omit it when the query did not mark a docstring-only hit.
1661        assert!(
1662            !obj.contains_key("docstring_matched"),
1663            "docstring_matched=None must be omitted"
1664        );
1665    }
1666
1667    #[test]
1668    fn to_value_include_reattaches_docstring() {
1669        let r = sample_search_result();
1670        let v = r.to_value_with_optional_docstring(true);
1671        let obj = v.as_object().expect("object");
1672        assert_eq!(
1673            obj.get("docstring").and_then(|x| x.as_str()),
1674            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1675        );
1676    }
1677
1678    #[test]
1679    fn to_value_serializes_docstring_matched_when_set() {
1680        let mut r = sample_search_result();
1681        r.docstring_matched = Some(true);
1682        let v = r.to_value_with_optional_docstring(false);
1683        let obj = v.as_object().expect("object");
1684        assert_eq!(
1685            obj.get("docstring_matched").and_then(|x| x.as_bool()),
1686            Some(true)
1687        );
1688    }
1689
1690    // ─── projection glue ──────────────────────────────────────────
1691    //
1692    // These tests exercise the projection path that `hub_search` uses to
1693    // shape output: `resolve_fields` + `project_fields` applied to a
1694    // `to_value_with_optional_docstring`-serialized entry. They pin the
1695    // wf-sim-verbose contract: `fields` wins over `verbose`, default
1696    // summary preset excludes docstring, `full` preset includes
1697    // docstring, unknown keys silently skipped.
1698
1699    #[test]
1700    fn hub_search_default_summary_excludes_docstring() {
1701        let r = sample_search_result();
1702        let fields = resolve_fields(None, None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1703        let include_docstring = fields.iter().any(|f| f == "docstring");
1704        let v = project_fields(
1705            r.to_value_with_optional_docstring(include_docstring),
1706            &fields,
1707        );
1708        let obj = v.as_object().expect("object");
1709        assert!(
1710            !obj.contains_key("docstring"),
1711            "summary preset must omit docstring"
1712        );
1713        // summary preset fields that are present on the sample entry
1714        for key in ["name", "version", "description", "category", "installed"] {
1715            assert!(obj.contains_key(key), "summary preset key {key} missing");
1716        }
1717    }
1718
1719    #[test]
1720    fn hub_search_verbose_full_includes_docstring() {
1721        let r = sample_search_result();
1722        let fields =
1723            resolve_fields(Some("full"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1724        let include_docstring = fields.iter().any(|f| f == "docstring");
1725        let v = project_fields(
1726            r.to_value_with_optional_docstring(include_docstring),
1727            &fields,
1728        );
1729        let obj = v.as_object().expect("object");
1730        assert_eq!(
1731            obj.get("docstring").and_then(|x| x.as_str()),
1732            Some("Based on FrugalGPT. Uses Thompson Sampling.")
1733        );
1734        // full preset superset keys
1735        for key in ["source", "card_count"] {
1736            assert!(obj.contains_key(key), "full preset key {key} missing");
1737        }
1738    }
1739
1740    #[test]
1741    fn hub_search_fields_beats_verbose() {
1742        let r = sample_search_result();
1743        let explicit = vec!["name".to_string(), "docstring".to_string()];
1744        // verbose=summary normally excludes docstring, but explicit
1745        // fields must win.
1746        let fields = resolve_fields(
1747            Some("summary"),
1748            Some(&explicit),
1749            HUB_SEARCH_SUMMARY,
1750            HUB_SEARCH_FULL,
1751        )
1752        .unwrap();
1753        let include_docstring = fields.iter().any(|f| f == "docstring");
1754        let v = project_fields(
1755            r.to_value_with_optional_docstring(include_docstring),
1756            &fields,
1757        );
1758        let obj = v.as_object().expect("object");
1759        assert_eq!(obj.len(), 2, "only the two requested fields");
1760        assert!(obj.contains_key("name"));
1761        assert!(obj.contains_key("docstring"));
1762    }
1763
1764    #[test]
1765    fn hub_search_fields_unknown_key_silently_skipped() {
1766        let r = sample_search_result();
1767        let explicit = vec!["name".to_string(), "bogus".to_string()];
1768        let fields =
1769            resolve_fields(None, Some(&explicit), HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap();
1770        let v = project_fields(r.to_value_with_optional_docstring(false), &fields);
1771        let obj = v.as_object().expect("object");
1772        assert_eq!(obj.len(), 1, "bogus must not appear");
1773        assert!(obj.contains_key("name"));
1774    }
1775
1776    #[test]
1777    fn hub_search_invalid_verbose_errors() {
1778        let err =
1779            resolve_fields(Some("fat"), None, HUB_SEARCH_SUMMARY, HUB_SEARCH_FULL).unwrap_err();
1780        assert!(
1781            err.contains("fat"),
1782            "error must mention the offending value"
1783        );
1784    }
1785
1786    // ─── docstring_matched classification ─────────────────────────
1787    //
1788    // The query-time classification rule: `docstring_matched = Some(true)`
1789    // only when the query hit docstring AND missed name/description/
1790    // category; otherwise `None` (and therefore omitted from output).
1791    // The logic lives inline in `hub_search`; we re-create it here over a
1792    // tiny local helper so the three cases stay pinned as a contract.
1793
1794    fn classify(r: &SearchResult, query: &str) -> Option<bool> {
1795        let ql = query.to_lowercase();
1796        if query.is_empty() {
1797            return None;
1798        }
1799        let other_hit = r.name.to_lowercase().contains(&ql)
1800            || r.description.to_lowercase().contains(&ql)
1801            || r.category.to_lowercase().contains(&ql);
1802        let doc_hit = r.docstring.to_lowercase().contains(&ql);
1803        if !other_hit && doc_hit {
1804            Some(true)
1805        } else {
1806            None
1807        }
1808    }
1809
1810    #[test]
1811    fn docstring_matched_true_when_only_docstring_hits() {
1812        let r = sample_search_result();
1813        // "Thompson" appears only in docstring of the sample entry.
1814        assert_eq!(classify(&r, "thompson"), Some(true));
1815    }
1816
1817    #[test]
1818    fn docstring_matched_none_when_name_also_hits() {
1819        let r = sample_search_result();
1820        // "cascade" hits the name; docstring match is irrelevant now.
1821        assert_eq!(classify(&r, "cascade"), None);
1822    }
1823
1824    #[test]
1825    fn docstring_matched_none_when_description_hits() {
1826        let r = sample_search_result();
1827        // "routing" hits description; should be None.
1828        assert_eq!(classify(&r, "routing"), None);
1829    }
1830
1831    #[test]
1832    fn docstring_matched_none_when_query_empty() {
1833        let r = sample_search_result();
1834        assert_eq!(classify(&r, ""), None);
1835    }
1836
1837    // ─── filter fold (legacy params → filter map) ─────────────────
1838    //
1839    // Behavioural rule: legacy `category` / `installed_only=true` fold
1840    // into the filter map only when the corresponding key is not
1841    // already set (explicit `filter` wins). `installed_only=false` is a
1842    // no-op (preserves prior semantics).
1843
1844    fn build_filter_map(
1845        category: Option<&str>,
1846        installed_only: Option<bool>,
1847        explicit: Option<HashMap<String, serde_json::Value>>,
1848    ) -> HashMap<String, serde_json::Value> {
1849        let mut filter_map = explicit.unwrap_or_default();
1850        if let Some(cat) = category {
1851            filter_map
1852                .entry("category".to_string())
1853                .or_insert_with(|| serde_json::Value::String(cat.to_string()));
1854        }
1855        if let Some(only) = installed_only {
1856            if only {
1857                filter_map
1858                    .entry("installed".to_string())
1859                    .or_insert(serde_json::Value::Bool(true));
1860            }
1861        }
1862        filter_map
1863    }
1864
1865    #[test]
1866    fn filter_by_category_via_legacy_param() {
1867        let m = build_filter_map(Some("reasoning"), None, None);
1868        assert_eq!(
1869            m.get("category"),
1870            Some(&serde_json::Value::String("reasoning".to_string()))
1871        );
1872    }
1873
1874    #[test]
1875    fn filter_by_installed_only_via_legacy_param() {
1876        let m = build_filter_map(None, Some(true), None);
1877        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1878    }
1879
1880    #[test]
1881    fn filter_installed_only_false_is_noop() {
1882        let m = build_filter_map(None, Some(false), None);
1883        assert!(
1884            !m.contains_key("installed"),
1885            "installed_only=false should not fold in"
1886        );
1887    }
1888
1889    #[test]
1890    fn filter_beats_legacy_param_on_conflict() {
1891        // Explicit filter says category=meta; legacy says reasoning.
1892        // Explicit must win.
1893        let mut explicit = HashMap::new();
1894        explicit.insert(
1895            "category".to_string(),
1896            serde_json::Value::String("meta".to_string()),
1897        );
1898        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1899        assert_eq!(
1900            m.get("category"),
1901            Some(&serde_json::Value::String("meta".to_string()))
1902        );
1903    }
1904
1905    #[test]
1906    fn filter_merges_legacy_when_no_conflict() {
1907        // Explicit sets a different key; legacy category should still
1908        // be folded in.
1909        let mut explicit = HashMap::new();
1910        explicit.insert("installed".to_string(), serde_json::Value::Bool(true));
1911        let m = build_filter_map(Some("reasoning"), None, Some(explicit));
1912        assert_eq!(
1913            m.get("category"),
1914            Some(&serde_json::Value::String("reasoning".to_string()))
1915        );
1916        assert_eq!(m.get("installed"), Some(&serde_json::Value::Bool(true)));
1917    }
1918
1919    // ─── default sort verification ────────────────────────────────
1920
1921    #[test]
1922    fn default_sort_is_minus_installed_name() {
1923        let keys = parse_sort("-installed,name").unwrap();
1924        assert_eq!(keys.len(), 2);
1925        assert_eq!(keys[0].key, "installed");
1926        assert!(keys[0].desc, "installed must sort desc (true first)");
1927        assert_eq!(keys[1].key, "name");
1928        assert!(!keys[1].desc);
1929
1930        // Apply it against a small vec and confirm the expected order.
1931        let mut items = vec![
1932            serde_json::json!({"installed": false, "name": "zeta"}),
1933            serde_json::json!({"installed": true, "name": "mu"}),
1934            serde_json::json!({"installed": false, "name": "alpha"}),
1935            serde_json::json!({"installed": true, "name": "beta"}),
1936        ];
1937        apply_sort_by_value(&mut items, &keys);
1938        let names: Vec<&str> = items
1939            .iter()
1940            .map(|v| v.get("name").and_then(|x| x.as_str()).unwrap_or(""))
1941            .collect();
1942        assert_eq!(names, vec!["beta", "mu", "alpha", "zeta"]);
1943    }
1944}