rsclaw 2026.5.1

AI Agent Engine Compatible with OpenClaw
Documentation
//! Skill registry abstraction.
//!
//! All registries — clawhub.ai, skillhub (Tencent), skills.sh — implement the
//! same `Registry` enum so search and install logic is uniform. Callers pick
//! which registries to activate; the concurrent merge is always the same.

use reqwest::Client;
use tracing::debug;

// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------

/// A single skill search result from any registry.
#[derive(Debug, Clone)]
pub struct SearchResult {
    pub slug: String,
    pub version: Option<String>,
    pub description: Option<String>,
    pub downloads: Option<u64>,
    pub installs: Option<u64>,
    pub stars: Option<u64>,
    /// Which registry returned this result.
    pub registry: String,
}

// ---------------------------------------------------------------------------
// Registry enum
// ---------------------------------------------------------------------------

/// A single skill registry that can be searched.
///
/// All variants share the same `search()` method so concurrent search and
/// result merging work uniformly regardless of the backend.
pub enum Registry {
    /// clawhub.ai — default for non-CN locales.
    Clawhub {
        client: Client,
        api_base: String,
        token: Option<String>,
    },
    /// skillhub (Tencent COS + lightmake.site) — preferred for CN locales.
    Skillhub {
        client: Client,
        search_url: String,
        index_url: String,
    },
    /// skills.sh community directory — always searched, 91K+ skills ranked by installs.
    Skillsh {
        client: Client,
    },
    /// iWenCai SkillHub (同花顺金融技能库). The upstream gateway returns the
    /// full skill list at one endpoint; we filter client-side because there
    /// is no public keyword-search API.
    Iwencai {
        client: Client,
        list_url: String,
    },
}

impl Registry {
    /// Human-readable registry name for display.
    pub fn name(&self) -> &str {
        match self {
            Registry::Clawhub { .. } => "clawhub.ai",
            Registry::Skillhub { .. } => "skillhub",
            Registry::Skillsh { .. } => "skills.sh",
            Registry::Iwencai { .. } => "iwencai",
        }
    }

    /// Search this registry for skills matching `query`.
    pub async fn search(&self, query: &str) -> Vec<SearchResult> {
        match self {
            Registry::Clawhub { client, api_base, token } => {
                search_clawhub(client, api_base, token.as_deref(), query).await
            }
            Registry::Skillhub { client, search_url, index_url } => {
                search_skillhub(client, search_url, index_url, query).await
            }
            Registry::Skillsh { client } => {
                search_skillsh(client, query).await
            }
            Registry::Iwencai { client, list_url } => {
                search_iwencai(client, list_url, query).await
            }
        }
    }
}

// ---------------------------------------------------------------------------
// Concurrent multi-registry search
// ---------------------------------------------------------------------------

/// Search all `registries` concurrently, merge results, and sort by installs.
///
/// Deduplication uses the normalized slug (e.g. `"owner/repo@skill"` → `"skill"`).
/// When the same skill appears in multiple registries the variant with the higher
/// install count wins; missing fields are filled in from the other entry.
pub async fn search_concurrent(registries: &[Registry], query: &str) -> Vec<SearchResult> {
    // Fire all searches in parallel.
    let futures: Vec<_> = registries.iter().map(|r| r.search(query)).collect();
    let all_results: Vec<Vec<SearchResult>> = futures::future::join_all(futures).await;

    debug!(
        registries = registries.iter().map(|r| r.name()).collect::<Vec<_>>().join(", "),
        counts = all_results.iter().map(|v| v.len().to_string()).collect::<Vec<_>>().join("+"),
        "concurrent search complete"
    );

    // Merge and dedup.
    let mut seen: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
    let mut merged: Vec<SearchResult> = Vec::new();

    for result in all_results.into_iter().flatten() {
        let key = normalize_slug(&result.slug);
        if let Some(&idx) = seen.get(&key) {
            let existing = &mut merged[idx];
            if result.installs.unwrap_or(0) > existing.installs.unwrap_or(0) {
                existing.installs = result.installs;
            }
            if existing.description.is_none() {
                if let Some(desc) = result.description {
                    // Annotate description with its source registry when it
                    // differs from the registry that owns the slug.
                    existing.description = Some(if result.registry != existing.registry {
                        format!("[{}] {}", result.registry, desc)
                    } else {
                        desc
                    });
                }
            }
            if existing.version.is_none() {
                existing.version = result.version;
            }
        } else {
            seen.insert(key, merged.len());
            merged.push(result);
        }
    }

    // Sort by composite popularity score: installs (primary) + downloads + stars.
    // Stars are weighted higher than raw downloads as a quality signal.
    merged.sort_by(|a, b| popularity_score(b).cmp(&popularity_score(a)));
    merged
}

// ---------------------------------------------------------------------------
// Per-registry search implementations
// ---------------------------------------------------------------------------

async fn search_clawhub(
    client: &Client,
    api_base: &str,
    token: Option<&str>,
    query: &str,
) -> Vec<SearchResult> {
    let url = format!("{}/v1/search?q={}", api_base, url_encode(query));
    let mut req = client.get(&url);
    if let Some(t) = token {
        req = req.bearer_auth(t);
    }
    let Ok(resp) = req.send().await else { return vec![] };
    if !resp.status().is_success() { return vec![]; }
    let Ok(body) = resp.json::<serde_json::Value>().await else { return vec![] };
    parse_standard_response(&body, "clawhub.ai")
}

async fn search_skillhub(
    client: &Client,
    search_url: &str,
    _index_url: &str,
    query: &str,
) -> Vec<SearchResult> {
    // skillhub.cn API: GET /api/skills?keyword=<q>&page=1&pageSize=20
    // Response: { code: 0, data: { skills: [...], total: N } }
    let url = format!(
        "{}?keyword={}&page=1&pageSize=20",
        search_url,
        url_encode(query)
    );
    let Ok(resp) = client.get(&url).send().await else { return vec![] };
    if !resp.status().is_success() { return vec![]; }
    let Ok(body) = resp.json::<serde_json::Value>().await else { return vec![] };

    // Unwrap {code:0, data:{skills:[...]}} envelope.
    let arr = body
        .get("data")
        .and_then(|d| d.get("skills"))
        .and_then(|v| v.as_array());

    let Some(arr) = arr else { return vec![] };

    arr.iter()
        .map(|item| {
            let desc = item["description_zh"]
                .as_str()
                .filter(|s| !s.is_empty())
                .or_else(|| item["description"].as_str())
                .map(|s| s.to_owned());
            SearchResult {
                slug: item["slug"].as_str().unwrap_or("unknown").to_owned(),
                version: item["version"].as_str().map(|s| s.to_owned()),
                description: desc,
                downloads: item["downloads"].as_u64(),
                installs: item["installs"].as_u64(),
                stars: item["stars"].as_u64(),
                registry: "skillhub".to_owned(),
            }
        })
        .collect()
}

/// Search iwencai's skill square. The upstream endpoint
/// `GET /skills/square?pageSize=N&page=1` returns the entire catalogue as
/// `{ data: { records: [{name, cn_name, description, download_count, ...}] } }`.
/// There is no `keyword=` parameter — we paginate-and-filter client-side.
/// An empty query returns the full first page so callers like the agent's
/// "show me everything" flow work without special-casing.
async fn search_iwencai(client: &Client, list_url: &str, query: &str) -> Vec<SearchResult> {
    // iwencai's gateway uses `size` (not `pageSize`/`page_size`) and caps
    // somewhere between 100 and 150 — `size=100` returns the full catalogue
    // (~89 skills) in one shot, `size=150` 500's. Pull everything once and
    // filter client-side; expand to true pagination if the catalogue grows.
    let url = if list_url.contains('?') {
        format!("{list_url}&size=100&page=1")
    } else {
        format!("{list_url}?size=100&page=1")
    };
    let Ok(resp) = client.get(&url).send().await else { return vec![] };
    if !resp.status().is_success() { return vec![]; }
    let Ok(body) = resp.json::<serde_json::Value>().await else { return vec![] };

    let q = query.trim().to_lowercase();
    let arr = body
        .get("data")
        .and_then(|d| d.get("records"))
        .and_then(|v| v.as_array());
    let Some(arr) = arr else { return vec![] };

    arr.iter()
        .filter(|item| {
            // Hide 同花顺 internal tooling (sunmao-*, hxkline-*, ths-*,
            // hexin-*, cmdb, alert-analyzer, ...) — only the `hithink-*`
            // line is the curated public finance API surface. Without
            // this filter ~67 of 89 skills are 同花顺 devops/scaffolding
            // that nobody outside the company should be installing.
            let name = item["name"].as_str().unwrap_or("");
            if !name.starts_with("hithink-") { return false; }
            if q.is_empty() { return true; }
            let q_lc = q.as_str();
            let cn_name = item["cn_name"].as_str().unwrap_or("").to_lowercase();
            let desc = item["description"].as_str().unwrap_or("").to_lowercase();
            name.to_lowercase().contains(q_lc) || cn_name.contains(q_lc) || desc.contains(q_lc)
        })
        .map(|item| {
            // iwencai's `cn_name` is more user-friendly than `name`; surface
            // it in the description so users see what each slug is.
            let raw_desc = item["description"].as_str().unwrap_or("");
            let cn_name = item["cn_name"].as_str().unwrap_or("");
            let desc = if !cn_name.is_empty() {
                format!("{cn_name}{raw_desc}")
            } else {
                raw_desc.to_owned()
            };
            SearchResult {
                slug: item["name"].as_str().unwrap_or("unknown").to_owned(),
                version: item["version"].as_str().map(|s| s.to_owned()),
                description: if desc.is_empty() { None } else { Some(desc) },
                downloads: item["download_count"].as_u64(),
                installs: item["download_success_count"].as_u64(),
                stars: item["star_count"].as_u64(),
                registry: "iwencai".to_owned(),
            }
        })
        .collect()
}

async fn search_skillsh(client: &Client, query: &str) -> Vec<SearchResult> {
    let url = format!("https://skills.sh/api/search?q={}&limit=20", url_encode(query));
    let Ok(resp) = client.get(&url).send().await else { return vec![] };
    if !resp.status().is_success() { return vec![]; }
    let Ok(body) = resp.json::<serde_json::Value>().await else { return vec![] };

    body.get("skills")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .map(|item| {
                    // skills.sh: {id, skillId, name, installs, source: "owner/repo"}
                    let source = item["source"].as_str().unwrap_or("");
                    let skill_id = item["skillId"].as_str()
                        .or_else(|| item["name"].as_str())
                        .unwrap_or("unknown");
                    let slug = if source.is_empty() {
                        skill_id.to_owned()
                    } else {
                        format!("{source}@{skill_id}")
                    };
                    SearchResult {
                        slug,
                        version: None,
                        description: item["description"].as_str()
                            .or_else(|| item["summary"].as_str())
                            .map(|s| s.to_owned()),
                        downloads: None,
                        installs: item["installs"].as_u64(),
                        stars: item["stars"].as_u64(),
                        registry: "skills.sh".to_owned(),
                    }
                })
                .collect()
        })
        .unwrap_or_default()
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

fn parse_standard_response(body: &serde_json::Value, registry: &str) -> Vec<SearchResult> {
    body.get("skills")
        .or_else(|| body.get("results"))
        .and_then(|v| v.as_array())
        .map(|arr| arr.iter().map(|item| to_result(item, registry)).collect())
        .unwrap_or_default()
}

fn to_result(item: &serde_json::Value, registry: &str) -> SearchResult {
    SearchResult {
        slug: item["slug"].as_str()
            .or_else(|| item["name"].as_str())
            .unwrap_or("unknown")
            .to_owned(),
        version: item["version"].as_str().map(|s| s.to_owned()),
        description: item["summary"].as_str()
            .or_else(|| item["description"].as_str())
            .map(|s| s.to_owned()),
        downloads: item["downloads"].as_u64()
            .or_else(|| item["download_count"].as_u64()),
        installs: item["installs"].as_u64()
            .or_else(|| item["install_count"].as_u64()),
        stars: item["stars"].as_u64()
            .or_else(|| item["favorites"].as_u64())
            .or_else(|| item["star_count"].as_u64()),
        registry: registry.to_owned(),
    }
}

/// Composite popularity score for sorting search results.
///
/// installs + downloads×0.5 + stars×10
/// Stars are weighted highest per unit as a quality signal.
/// Results with no signals sort to the bottom.
fn popularity_score(r: &SearchResult) -> u64 {
    let installs = r.installs.unwrap_or(0);
    let downloads = r.downloads.unwrap_or(0) / 2;
    let stars = r.stars.unwrap_or(0).saturating_mul(10);
    installs.saturating_add(downloads).saturating_add(stars)
}

/// Normalize slug to a short name for deduplication.
///
/// `"owner/repo@skill"` → `"skill"`, `"owner/repo"` → `"repo"`, `"skill"` → `"skill"`
pub fn normalize_slug(slug: &str) -> String {
    if let Some((_, after)) = slug.rsplit_once('@') {
        return after.to_lowercase();
    }
    slug.rsplit('/').next().unwrap_or(slug).to_lowercase()
}

/// Percent-encode a string for use in URL query parameters (RFC 3986 unreserved set).
pub(crate) fn url_encode(s: &str) -> String {
    let mut out = String::with_capacity(s.len() * 3);
    for byte in s.bytes() {
        match byte {
            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
                out.push(byte as char);
            }
            _ => {
                out.push('%');
                out.push(char::from(b"0123456789ABCDEF"[(byte >> 4) as usize]));
                out.push(char::from(b"0123456789ABCDEF"[(byte & 0xf) as usize]));
            }
        }
    }
    out
}