rsclaw-skill 0.1.0

Skill crate for RsClaw — internal workspace crate, not for direct use
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
//! Skill registry abstraction.
//!
//! All registries — clawhub.ai, skillhub (Tencent), skills.sh — implement the
//! same `Registry` enum so search and install logic is uniform. Callers pick
//! which registries to activate; the concurrent merge is always the same.

use reqwest::Client;
use tracing::debug;

// ---------------------------------------------------------------------------
// Public types
// ---------------------------------------------------------------------------

/// A single skill search result from any registry.
#[derive(Debug, Clone)]
pub struct SearchResult {
    pub slug: String,
    pub version: Option<String>,
    pub description: Option<String>,
    pub downloads: Option<u64>,
    pub installs: Option<u64>,
    pub stars: Option<u64>,
    /// Which registry returned this result.
    pub registry: String,
}

// ---------------------------------------------------------------------------
// Registry enum
// ---------------------------------------------------------------------------

/// A single skill registry that can be searched.
///
/// All variants share the same `search()` method so concurrent search and
/// result merging work uniformly regardless of the backend.
pub enum Registry {
    /// clawhub.ai — default for non-CN locales.
    Clawhub {
        client: Client,
        api_base: String,
        token: Option<String>,
    },
    /// skillhub (Tencent COS + lightmake.site) — preferred for CN locales.
    Skillhub {
        client: Client,
        search_url: String,
        index_url: String,
    },
    /// skills.sh community directory — always searched, 91K+ skills ranked by
    /// installs.
    Skillsh { client: Client },
    /// iWenCai SkillHub (同花顺金融技能库). The upstream gateway returns the
    /// full skill list at one endpoint; we filter client-side because there
    /// is no public keyword-search API.
    Iwencai { client: Client, list_url: String },
}

impl Registry {
    /// Human-readable registry name for display.
    pub fn name(&self) -> &str {
        match self {
            Registry::Clawhub { .. } => "clawhub.ai",
            Registry::Skillhub { .. } => "skillhub",
            Registry::Skillsh { .. } => "skills.sh",
            Registry::Iwencai { .. } => "iwencai",
        }
    }

    /// Search this registry for skills matching `query`.
    pub async fn search(&self, query: &str) -> Vec<SearchResult> {
        match self {
            Registry::Clawhub {
                client,
                api_base,
                token,
            } => search_clawhub(client, api_base, token.as_deref(), query).await,
            Registry::Skillhub {
                client,
                search_url,
                index_url,
            } => search_skillhub(client, search_url, index_url, query).await,
            Registry::Skillsh { client } => search_skillsh(client, query).await,
            Registry::Iwencai { client, list_url } => search_iwencai(client, list_url, query).await,
        }
    }
}

// ---------------------------------------------------------------------------
// Concurrent multi-registry search
// ---------------------------------------------------------------------------

/// Search all `registries` concurrently, merge results, and sort by installs.
///
/// Deduplication uses the normalized slug (e.g. `"owner/repo@skill"` →
/// `"skill"`). When the same skill appears in multiple registries the variant
/// with the higher install count wins; missing fields are filled in from the
/// other entry.
pub async fn search_concurrent(registries: &[Registry], query: &str) -> Vec<SearchResult> {
    // Fire all searches in parallel.
    let futures: Vec<_> = registries.iter().map(|r| r.search(query)).collect();
    let all_results: Vec<Vec<SearchResult>> = futures::future::join_all(futures).await;

    debug!(
        registries = registries
            .iter()
            .map(|r| r.name())
            .collect::<Vec<_>>()
            .join(", "),
        counts = all_results
            .iter()
            .map(|v| v.len().to_string())
            .collect::<Vec<_>>()
            .join("+"),
        "concurrent search complete"
    );

    // Merge and dedup.
    let mut seen: std::collections::HashMap<String, usize> = std::collections::HashMap::new();
    let mut merged: Vec<SearchResult> = Vec::new();

    for result in all_results.into_iter().flatten() {
        let key = normalize_slug(&result.slug);
        if let Some(&idx) = seen.get(&key) {
            let existing = &mut merged[idx];
            if result.installs.unwrap_or(0) > existing.installs.unwrap_or(0) {
                existing.installs = result.installs;
            }
            if existing.description.is_none() {
                if let Some(desc) = result.description {
                    // Annotate description with its source registry when it
                    // differs from the registry that owns the slug.
                    existing.description = Some(if result.registry != existing.registry {
                        format!("[{}] {}", result.registry, desc)
                    } else {
                        desc
                    });
                }
            }
            if existing.version.is_none() {
                existing.version = result.version;
            }
        } else {
            seen.insert(key, merged.len());
            merged.push(result);
        }
    }

    // Sort by composite popularity score: installs (primary) + downloads + stars.
    // Stars are weighted higher than raw downloads as a quality signal.
    merged.sort_by(|a, b| popularity_score(b).cmp(&popularity_score(a)));
    merged
}

// ---------------------------------------------------------------------------
// Per-registry search implementations
// ---------------------------------------------------------------------------

async fn search_clawhub(
    client: &Client,
    api_base: &str,
    token: Option<&str>,
    query: &str,
) -> Vec<SearchResult> {
    let url = format!("{}/v1/search?q={}", api_base, url_encode(query));
    let mut req = client.get(&url);
    if let Some(t) = token {
        req = req.bearer_auth(t);
    }
    let Ok(resp) = req.send().await else {
        return vec![];
    };
    if !resp.status().is_success() {
        return vec![];
    }
    let Ok(body) = resp.json::<serde_json::Value>().await else {
        return vec![];
    };
    parse_standard_response(&body, "clawhub.ai")
}

async fn search_skillhub(
    client: &Client,
    search_url: &str,
    _index_url: &str,
    query: &str,
) -> Vec<SearchResult> {
    // skillhub.cn API: GET /api/skills?keyword=<q>&page=1&pageSize=20
    // Response: { code: 0, data: { skills: [...], total: N } }
    let url = format!(
        "{}?keyword={}&page=1&pageSize=20",
        search_url,
        url_encode(query)
    );
    let Ok(resp) = client.get(&url).send().await else {
        return vec![];
    };
    if !resp.status().is_success() {
        return vec![];
    }
    let Ok(body) = resp.json::<serde_json::Value>().await else {
        return vec![];
    };

    // Unwrap {code:0, data:{skills:[...]}} envelope.
    let arr = body
        .get("data")
        .and_then(|d| d.get("skills"))
        .and_then(|v| v.as_array());

    let Some(arr) = arr else { return vec![] };

    arr.iter()
        .map(|item| {
            let desc = item["description_zh"]
                .as_str()
                .filter(|s| !s.is_empty())
                .or_else(|| item["description"].as_str())
                .map(|s| s.to_owned());
            SearchResult {
                slug: item["slug"].as_str().unwrap_or("unknown").to_owned(),
                version: item["version"].as_str().map(|s| s.to_owned()),
                description: desc,
                downloads: item["downloads"].as_u64(),
                installs: item["installs"].as_u64(),
                stars: item["stars"].as_u64(),
                registry: "skillhub".to_owned(),
            }
        })
        .collect()
}

/// Search iwencai's skill square. The upstream endpoint
/// `GET /skills/square?pageSize=N&page=1` returns the entire catalogue as
/// `{ data: { records: [{name, cn_name, description, download_count, ...}] }
/// }`. There is no `keyword=` parameter — we paginate-and-filter client-side.
/// An empty query returns the full first page so callers like the agent's
/// "show me everything" flow work without special-casing.
async fn search_iwencai(client: &Client, list_url: &str, query: &str) -> Vec<SearchResult> {
    // iwencai's gateway uses `size` (not `pageSize`/`page_size`) and caps
    // somewhere between 100 and 150 — `size=100` returns the full catalogue
    // (~89 skills) in one shot, `size=150` 500's. Pull everything once and
    // filter client-side; expand to true pagination if the catalogue grows.
    let url = if list_url.contains('?') {
        format!("{list_url}&size=100&page=1")
    } else {
        format!("{list_url}?size=100&page=1")
    };
    let Ok(resp) = client.get(&url).send().await else {
        return vec![];
    };
    if !resp.status().is_success() {
        return vec![];
    }
    let Ok(body) = resp.json::<serde_json::Value>().await else {
        return vec![];
    };

    let q = query.trim().to_lowercase();
    let arr = body
        .get("data")
        .and_then(|d| d.get("records"))
        .and_then(|v| v.as_array());
    let Some(arr) = arr else { return vec![] };

    arr.iter()
        .filter(|item| {
            // Hide 同花顺 internal tooling (sunmao-*, hxkline-*, ths-*,
            // hexin-*, cmdb, alert-analyzer, ...) — only the `hithink-*`
            // line is the curated public finance API surface. Without
            // this filter ~67 of 89 skills are 同花顺 devops/scaffolding
            // that nobody outside the company should be installing.
            let name = item["name"].as_str().unwrap_or("");
            if !name.starts_with("hithink-") {
                return false;
            }
            if q.is_empty() {
                return true;
            }
            let q_lc = q.as_str();
            let cn_name = item["cn_name"].as_str().unwrap_or("").to_lowercase();
            let desc = item["description"].as_str().unwrap_or("").to_lowercase();
            name.to_lowercase().contains(q_lc) || cn_name.contains(q_lc) || desc.contains(q_lc)
        })
        .map(|item| {
            // iwencai's `cn_name` is more user-friendly than `name`; surface
            // it in the description so users see what each slug is.
            let raw_desc = item["description"].as_str().unwrap_or("");
            let cn_name = item["cn_name"].as_str().unwrap_or("");
            let desc = if !cn_name.is_empty() {
                format!("{cn_name}{raw_desc}")
            } else {
                raw_desc.to_owned()
            };
            SearchResult {
                slug: item["name"].as_str().unwrap_or("unknown").to_owned(),
                version: item["version"].as_str().map(|s| s.to_owned()),
                description: if desc.is_empty() { None } else { Some(desc) },
                downloads: item["download_count"].as_u64(),
                installs: item["download_success_count"].as_u64(),
                stars: item["star_count"].as_u64(),
                registry: "iwencai".to_owned(),
            }
        })
        .collect()
}

async fn search_skillsh(client: &Client, query: &str) -> Vec<SearchResult> {
    let url = format!(
        "https://skills.sh/api/search?q={}&limit=20",
        url_encode(query)
    );
    let Ok(resp) = client.get(&url).send().await else {
        return vec![];
    };
    if !resp.status().is_success() {
        return vec![];
    }
    let Ok(body) = resp.json::<serde_json::Value>().await else {
        return vec![];
    };

    body.get("skills")
        .and_then(|v| v.as_array())
        .map(|arr| {
            arr.iter()
                .map(|item| {
                    // skills.sh: {id, skillId, name, installs, source: "owner/repo"}
                    let source = item["source"].as_str().unwrap_or("");
                    let skill_id = item["skillId"]
                        .as_str()
                        .or_else(|| item["name"].as_str())
                        .unwrap_or("unknown");
                    let slug = if source.is_empty() {
                        skill_id.to_owned()
                    } else {
                        format!("{source}@{skill_id}")
                    };
                    SearchResult {
                        slug,
                        version: None,
                        description: item["description"]
                            .as_str()
                            .or_else(|| item["summary"].as_str())
                            .map(|s| s.to_owned()),
                        downloads: None,
                        installs: item["installs"].as_u64(),
                        stars: item["stars"].as_u64(),
                        registry: "skills.sh".to_owned(),
                    }
                })
                .collect()
        })
        .unwrap_or_default()
}

// ---------------------------------------------------------------------------
// Helpers
// ---------------------------------------------------------------------------

fn parse_standard_response(body: &serde_json::Value, registry: &str) -> Vec<SearchResult> {
    body.get("skills")
        .or_else(|| body.get("results"))
        .and_then(|v| v.as_array())
        .map(|arr| arr.iter().map(|item| to_result(item, registry)).collect())
        .unwrap_or_default()
}

fn to_result(item: &serde_json::Value, registry: &str) -> SearchResult {
    SearchResult {
        slug: item["slug"]
            .as_str()
            .or_else(|| item["name"].as_str())
            .unwrap_or("unknown")
            .to_owned(),
        version: item["version"].as_str().map(|s| s.to_owned()),
        description: item["summary"]
            .as_str()
            .or_else(|| item["description"].as_str())
            .map(|s| s.to_owned()),
        downloads: item["downloads"]
            .as_u64()
            .or_else(|| item["download_count"].as_u64()),
        installs: item["installs"]
            .as_u64()
            .or_else(|| item["install_count"].as_u64()),
        stars: item["stars"]
            .as_u64()
            .or_else(|| item["favorites"].as_u64())
            .or_else(|| item["star_count"].as_u64()),
        registry: registry.to_owned(),
    }
}

/// Composite popularity score for sorting search results.
///
/// installs + downloads×0.5 + stars×10
/// Stars are weighted highest per unit as a quality signal.
/// Results with no signals sort to the bottom.
fn popularity_score(r: &SearchResult) -> u64 {
    let installs = r.installs.unwrap_or(0);
    let downloads = r.downloads.unwrap_or(0) / 2;
    let stars = r.stars.unwrap_or(0).saturating_mul(10);
    installs.saturating_add(downloads).saturating_add(stars)
}

/// Normalize slug to a short name for deduplication.
///
/// `"owner/repo@skill"` → `"skill"`, `"owner/repo"` → `"repo"`, `"skill"` →
/// `"skill"`
pub fn normalize_slug(slug: &str) -> String {
    if let Some((_, after)) = slug.rsplit_once('@') {
        return after.to_lowercase();
    }
    slug.rsplit('/').next().unwrap_or(slug).to_lowercase()
}

/// Percent-encode a string for use in URL query parameters (RFC 3986 unreserved
/// set).
pub(crate) fn url_encode(s: &str) -> String {
    let mut out = String::with_capacity(s.len() * 3);
    for byte in s.bytes() {
        match byte {
            b'A'..=b'Z' | b'a'..=b'z' | b'0'..=b'9' | b'-' | b'_' | b'.' | b'~' => {
                out.push(byte as char);
            }
            _ => {
                out.push('%');
                out.push(char::from(b"0123456789ABCDEF"[(byte >> 4) as usize]));
                out.push(char::from(b"0123456789ABCDEF"[(byte & 0xf) as usize]));
            }
        }
    }
    out
}