Skip to main content

skilllite_evolution/skill_synth/
validate.rs

1//! 技能验证:文档完整性检测、技能目录收集、批量验证
2//!
3//! - **check_skill_md_completeness**: LLM 优先 + 启发式 fallback 检测 SKILL.md 质量
4//! - **validate_skills**: 对每个 skill infer → test → doc check,返回验证结果
5
6use std::path::{Path, PathBuf};
7
8use anyhow::Result;
9
10use crate::EvolutionLlm;
11use crate::EvolutionMessage;
12
13use super::infer;
14
15// ─── SKILL.md 文档完整性检测 ─────────────────────────────────────────────────
16
17/// 通过 LLM 检查 SKILL.md 文档完整性,LLM 失败时回退到启发式检测
18pub(super) async fn check_skill_md_completeness<L: EvolutionLlm>(
19    skill_dir: &Path,
20    llm: &L,
21    model: &str,
22) -> Option<String> {
23    let skill_md_path = skill_dir.join("SKILL.md");
24    let content = match skilllite_fs::read_file(&skill_md_path) {
25        Ok(c) => c,
26        Err(_) => return Some("SKILL.md 不存在或无法读取".to_string()),
27    };
28
29    let prompt = format!(
30        "请判断以下 SKILL.md 是否**同时**包含:\n\
31         1. **使用案例**:至少一个完整的调用示例(含具体输入参数值和预期输出)\n\
32         2. **参数说明**:所有输入参数的名称、类型和用途\n\n\
33         ## SKILL.md\n{}\n\n\
34         只返回 JSON,不要 markdown 包裹:\n\
35         {{\"complete\": true, \"missing\": \"\"}}\n\
36         或\n\
37         {{\"complete\": false, \"missing\": \"缺少内容的简述\"}}",
38        content,
39    );
40
41    let messages = vec![EvolutionMessage::user(&prompt)];
42    match llm.complete(&messages, model, 0.0).await {
43        Ok(response) => {
44            let trimmed = response.trim();
45            if let Some(json_str) = infer::extract_first_json_object(trimmed) {
46                if let Ok(parsed) = serde_json::from_str::<serde_json::Value>(json_str) {
47                    let complete = parsed
48                        .get("complete")
49                        .and_then(|v| v.as_bool())
50                        .unwrap_or(false);
51                    if complete {
52                        return None;
53                    }
54                    let missing = parsed
55                        .get("missing")
56                        .and_then(|v| v.as_str())
57                        .unwrap_or("使用案例和参数说明");
58                    return Some(format!("SKILL.md 文档不完整,缺少: {}", missing));
59                }
60            }
61            if trimmed.contains("\"complete\": true") || trimmed.contains("\"complete\":true") {
62                None
63            } else {
64                Some("SKILL.md 文档不完整(LLM 评估未通过)".to_string())
65            }
66        }
67        Err(e) => {
68            tracing::warn!(
69                "LLM doc quality check failed: {}, falling back to heuristic",
70                e
71            );
72            check_skill_md_completeness_heuristic(&content)
73        }
74    }
75}
76
77/// 启发式检测 SKILL.md 完整性(LLM 不可用时的 fallback)。
78/// 也可在生成阶段对尚未落盘的 skill_md_content 做写入前校验。
79pub(super) fn check_skill_md_completeness_heuristic(content: &str) -> Option<String> {
80    let has_examples = has_section_with_content(content, &["example", "usage", "用法", "示例"]);
81    let has_params = has_section_with_content(
82        content,
83        &["input schema", "parameters", "parameter", "参数"],
84    );
85
86    if has_examples && has_params {
87        return None;
88    }
89
90    let mut missing = Vec::new();
91    if !has_examples {
92        missing.push("使用案例 (Examples/Usage)");
93    }
94    if !has_params {
95        missing.push("参数说明及示例 (Input Schema/Parameters with examples)");
96    }
97
98    Some(format!("SKILL.md 文档不完整,缺少: {}", missing.join("、")))
99}
100
101fn has_section_with_content(content: &str, keywords: &[&str]) -> bool {
102    let lines: Vec<&str> = content.lines().collect();
103    for (i, line) in lines.iter().enumerate() {
104        let trimmed = line.trim();
105        if !trimmed.starts_with('#') {
106            continue;
107        }
108        let heading_lower = trimmed.trim_start_matches('#').trim().to_lowercase();
109        if !keywords.iter().any(|kw| heading_lower.contains(kw)) {
110            continue;
111        }
112        for next_line in &lines[(i + 1)..] {
113            let next = next_line.trim();
114            if next.is_empty() {
115                continue;
116            }
117            if next.starts_with('#') {
118                break;
119            }
120            return true;
121        }
122    }
123    false
124}
125
126// ─── 技能目录收集 ────────────────────────────────────────────────────────────
127
128/// 收集所有含 scripts 的 skill 目录
129pub(super) fn collect_skill_dirs(skills_root: &Path) -> Vec<(PathBuf, String)> {
130    if !skills_root.exists() {
131        return Vec::new();
132    }
133    let mut dirs: Vec<(PathBuf, String)> = Vec::new();
134    for entry in std::fs::read_dir(skills_root)
135        .ok()
136        .into_iter()
137        .flatten()
138        .filter_map(|e| e.ok())
139    {
140        let path = entry.path();
141        if !path.is_dir() {
142            continue;
143        }
144        let name = entry.file_name().to_string_lossy().into_owned();
145        if name.starts_with('_') {
146            if name == "_evolved" || name == "_pending" {
147                for e in std::fs::read_dir(&path)
148                    .ok()
149                    .into_iter()
150                    .flatten()
151                    .filter_map(|e| e.ok())
152                {
153                    let p = e.path();
154                    let sub = e.file_name().to_string_lossy().into_owned();
155                    if !p.is_dir() {
156                        continue;
157                    }
158                    if p.join("SKILL.md").exists() {
159                        dirs.push((p, sub));
160                    } else if sub == "_pending" {
161                        for e2 in std::fs::read_dir(&p)
162                            .ok()
163                            .into_iter()
164                            .flatten()
165                            .filter_map(|e| e.ok())
166                        {
167                            let p2 = e2.path();
168                            if p2.is_dir() && p2.join("SKILL.md").exists() {
169                                dirs.push((p2, e2.file_name().to_string_lossy().into_owned()));
170                            }
171                        }
172                    }
173                }
174            } else if path.join("SKILL.md").exists() {
175                dirs.push((path, name));
176            }
177            continue;
178        }
179        if path.join("SKILL.md").exists() {
180            dirs.push((path, name));
181        }
182    }
183    dirs.into_iter()
184        .filter(|(p, _)| !infer::list_scripts(p).is_empty())
185        .collect()
186}
187
188// ─── 验证结果 ────────────────────────────────────────────────────────────────
189
190/// 单个技能的验证结果
191pub struct SkillValidation {
192    pub skill_dir: PathBuf,
193    pub skill_name: String,
194    pub passed: bool,
195    pub entry_point: Option<String>,
196    pub test_input: Option<String>,
197    pub error: String,
198}
199
200// ─── 工具函数 ────────────────────────────────────────────────────────────────
201
202/// 从 error trace 提取可读摘要(优先 stderr 首行)
203fn brief_error(trace: &str) -> String {
204    if trace.is_empty() {
205        return String::new();
206    }
207    for section in ["stderr:\n", "stdout:\n"] {
208        if let Some(part) = trace.split(section).nth(1) {
209            let first = part
210                .lines()
211                .find(|l| !l.trim().is_empty())
212                .map(|l| l.trim())
213                .unwrap_or("");
214            if !first.is_empty() {
215                return if first.len() > 80 {
216                    format!("{}…", first.chars().take(77).collect::<String>())
217                } else {
218                    first.to_string()
219                };
220            }
221        }
222    }
223    let first = trace.lines().next().unwrap_or("");
224    if first.len() > 80 {
225        format!("{}…", &first[..77])
226    } else {
227        first.to_string()
228    }
229}
230
231// ─── validate_skills ─────────────────────────────────────────────────────────
232
233/// 验证技能:对每个 skill infer → test → doc check,返回结果列表。
234/// 若 `skill_names_filter` 为 `Some` 且非空,仅验证该列表中的技能名(目录名),否则验证全部。
235pub async fn validate_skills<L: EvolutionLlm>(
236    skills_root: &Path,
237    llm: &L,
238    model: &str,
239    skill_names_filter: Option<&[String]>,
240) -> Result<Vec<SkillValidation>> {
241    let mut skill_dirs = collect_skill_dirs(skills_root);
242    if let Some(names) = skill_names_filter {
243        if !names.is_empty() {
244            let set: std::collections::HashSet<&str> = names.iter().map(String::as_str).collect();
245            skill_dirs.retain(|(_, name)| set.contains(name.as_str()));
246            // 同名可能同时存在于 .skills/xxx 与 .skills/_evolved/xxx,只保留一个:优先 _evolved > _pending > 其它
247            let prefer = |p: &PathBuf| {
248                let s = p.to_string_lossy();
249                if s.contains("_evolved") {
250                    2
251                } else if s.contains("_pending") {
252                    1
253                } else {
254                    0
255                }
256            };
257            let mut by_name: std::collections::HashMap<String, (PathBuf, String)> =
258                std::collections::HashMap::new();
259            for (path, name) in skill_dirs {
260                let keep = match by_name.get(&name) {
261                    None => true,
262                    Some((existing, _)) => prefer(&path) > prefer(existing),
263                };
264                if keep {
265                    by_name.insert(name.clone(), (path, name));
266                }
267            }
268            // 按用户传入的筛选顺序输出,保证“只修选的”且顺序一致
269            skill_dirs = names
270                .iter()
271                .filter_map(|n| by_name.get(n.as_str()).cloned())
272                .collect();
273        }
274    }
275    let total = skill_dirs.len();
276    if total == 0 {
277        eprintln!("📋 未找到可验证的技能(无 scripts 的已跳过)");
278        return Ok(Vec::new());
279    }
280
281    eprintln!("📋 验证 {} 个技能...", total);
282    let mut results = Vec::with_capacity(total);
283    for (idx, (skill_dir, skill_name)) in skill_dirs.iter().enumerate() {
284        eprintln!("  [{}/{}] {} ...", idx + 1, total, skill_name);
285
286        let (entry_point, test_input) =
287            match infer::infer_skill_execution(llm, model, skill_dir).await {
288                Ok(ep) => ep,
289                Err(e) => {
290                    let err = format!("推理失败: {}", e);
291                    tracing::warn!("Skill '{}' {}", skill_name, err);
292                    results.push(SkillValidation {
293                        skill_dir: skill_dir.clone(),
294                        skill_name: skill_name.clone(),
295                        passed: false,
296                        entry_point: None,
297                        test_input: None,
298                        error: err,
299                    });
300                    continue;
301                }
302            };
303
304        // 验证前先安装依赖;无 package.json/requirements.txt 时从 SKILL.md compatibility 推断
305        let env_path: Option<PathBuf> = super::env_helper::ensure_skill_deps_and_env(skill_dir);
306
307        let (passed, error) = match infer::test_skill_invoke(
308            skill_dir,
309            &entry_point,
310            &test_input,
311            env_path.as_deref(),
312        ) {
313            Ok((ok, trace)) => {
314                if ok {
315                    match check_skill_md_completeness(skill_dir, llm, model).await {
316                        None => (true, String::new()),
317                        Some(doc_err) => (false, doc_err),
318                    }
319                } else {
320                    (false, trace)
321                }
322            }
323            Err(e) => (false, format!("调用失败: {}", e)),
324        };
325
326        results.push(SkillValidation {
327            skill_dir: skill_dir.clone(),
328            skill_name: skill_name.clone(),
329            passed,
330            entry_point: Some(entry_point),
331            test_input: Some(test_input),
332            error,
333        });
334    }
335
336    let pass = results.iter().filter(|v| v.passed).count();
337    eprintln!("📋 验证完成: {} 通过, {} 失败", pass, total - pass);
338    for v in &results {
339        if v.passed {
340            eprintln!("  ✅ {}", v.skill_name);
341        } else {
342            eprintln!("  ❌ {} → {}", v.skill_name, brief_error(&v.error));
343        }
344    }
345    Ok(results)
346}