1use std::path::Path;
9
10use crate::diagnostics::Diagnostic;
11use crate::models::SkillProperties;
12use crate::parser::read_properties;
13use crate::prompt::estimate_tokens;
14use crate::structure::validate_structure;
15use crate::validator::validate;
16use crate::Result;
17
18#[derive(Debug)]
20pub struct TestResult {
21 pub name: String,
23 pub description: String,
25 pub query: String,
27 pub query_match: QueryMatch,
29 pub score: f64,
31 pub estimated_tokens: usize,
33 pub diagnostics: Vec<Diagnostic>,
35 pub structure_diagnostics: Vec<Diagnostic>,
37 pub properties: SkillProperties,
39}
40
41#[derive(Debug, Clone, PartialEq, Eq)]
43pub enum QueryMatch {
44 Strong,
46 Weak,
48 None,
50}
51
52pub fn test_skill(dir: &Path, query: &str) -> Result<TestResult> {
68 let properties = read_properties(dir)?;
69
70 let (query_match, score) =
72 compute_query_match(query, &properties.name, &properties.description);
73
74 let estimated_tokens =
76 estimate_tokens(&properties.name) + estimate_tokens(&properties.description);
77
78 let diagnostics = validate(dir);
80
81 let structure_diagnostics = validate_structure(dir);
83
84 Ok(TestResult {
85 name: properties.name.clone(),
86 description: properties.description.clone(),
87 query: query.to_string(),
88 query_match,
89 score,
90 estimated_tokens,
91 diagnostics,
92 structure_diagnostics,
93 properties,
94 })
95}
96
97const DEFAULT_WIDTH: usize = 80;
99
100fn fmt_field(out: &mut String, label: &str, value: &str, col: usize, width: usize) {
104 let prefix = format!("{:<col$} ", label);
105 let indent = col + 1; let max_val = width.saturating_sub(indent);
107 if max_val == 0 || value.chars().count() + indent <= width {
108 out.push_str(&prefix);
109 out.push_str(value);
110 out.push('\n');
111 return;
112 }
113 let chars: Vec<(usize, char)> = value.char_indices().collect();
115 let mut char_pos = 0; let mut first = true;
117 while char_pos < chars.len() {
118 if !first {
120 while char_pos < chars.len() && chars[char_pos].1 == ' ' {
121 char_pos += 1;
122 }
123 if char_pos >= chars.len() {
124 break;
125 }
126 }
127 if first {
128 out.push_str(&prefix);
129 } else {
130 for _ in 0..indent {
131 out.push(' ');
132 }
133 }
134 let remaining_chars = chars.len() - char_pos;
135 if remaining_chars <= max_val {
136 let byte_start = chars[char_pos].0;
137 out.push_str(&value[byte_start..]);
138 out.push('\n');
139 break;
140 }
141 let end = char_pos + max_val;
143 let break_char = (char_pos..end)
144 .rev()
145 .find(|&i| chars[i].1 == ' ')
146 .unwrap_or(end);
147 let byte_start = chars[char_pos].0;
148 let byte_end = chars[break_char].0;
149 out.push_str(&value[byte_start..byte_end]);
150 out.push('\n');
151 char_pos = break_char;
152 first = false;
153 }
154}
155
156#[must_use]
158pub fn format_test_result(result: &TestResult) -> String {
159 format_test_result_width(result, DEFAULT_WIDTH)
160}
161
162#[must_use]
164pub(crate) fn format_test_result_width(result: &TestResult, width: usize) -> String {
165 let mut out = String::new();
166
167 const W: usize = 13;
169
170 fmt_field(&mut out, "Skill:", &result.name, W, width);
171 fmt_field(
172 &mut out,
173 "Query:",
174 &format!("\"{}\"", result.query),
175 W,
176 width,
177 );
178 fmt_field(&mut out, "Description:", &result.description, W, width);
179 out.push('\n');
180
181 let match_label = match &result.query_match {
183 QueryMatch::Strong => "STRONG ✓ — description aligns well with query",
184 QueryMatch::Weak => "WEAK ⚠ — some overlap, but description may not trigger reliably",
185 QueryMatch::None => "NONE ✗ — description does not match the test query",
186 };
187 fmt_field(
188 &mut out,
189 "Activation:",
190 &format!("{match_label} (score: {:.2})", result.score),
191 W,
192 width,
193 );
194
195 fmt_field(
197 &mut out,
198 "Tokens:",
199 &format!("~{} tokens", result.estimated_tokens),
200 W,
201 width,
202 );
203 out.push('\n');
204
205 let errors: Vec<_> = result.diagnostics.iter().filter(|d| d.is_error()).collect();
207 let warnings: Vec<_> = result
208 .diagnostics
209 .iter()
210 .filter(|d| d.is_warning())
211 .collect();
212
213 if errors.is_empty() && warnings.is_empty() && result.structure_diagnostics.is_empty() {
214 out.push_str("Validation: PASS — no issues found\n");
215 } else {
216 if !errors.is_empty() {
217 out.push_str(&format!("Validation errors ({}):\n", errors.len()));
218 for d in &errors {
219 out.push_str(&format!(" {d}\n"));
220 }
221 }
222 if !warnings.is_empty() {
223 out.push_str(&format!("Validation warnings ({}):\n", warnings.len()));
224 for d in &warnings {
225 out.push_str(&format!(" {d}\n"));
226 }
227 }
228 if !result.structure_diagnostics.is_empty() {
229 out.push_str(&format!(
230 "Structure issues ({}):\n",
231 result.structure_diagnostics.len()
232 ));
233 for d in &result.structure_diagnostics {
234 out.push_str(&format!(" {d}\n"));
235 }
236 }
237 }
238
239 out
240}
241
242const STOPWORDS: &[&str] = &[
244 "a", "an", "the", "is", "are", "was", "were", "of", "to", "in", "for", "on", "with", "and",
245 "or", "but", "not", "it", "this", "that",
246];
247
248fn stem(word: &str) -> String {
253 let w = word.to_lowercase();
254 for suffix in &[
256 "ting", "sing", "zing", "ning", "ring", "ses", "ies", "ing", "ed", "es", "s",
257 ] {
258 if w.len() > suffix.len() + 2 {
259 if let Some(root) = w.strip_suffix(suffix) {
260 return root.to_string();
261 }
262 }
263 }
264 w
265}
266
267fn tokenize(text: &str) -> Vec<String> {
270 text.split_whitespace()
271 .map(|w| {
272 let cleaned = w
273 .trim_matches(|c: char| !c.is_alphanumeric())
274 .to_lowercase();
275 stem(&cleaned)
276 })
277 .filter(|w| !w.is_empty() && !STOPWORDS.contains(&w.as_str()))
278 .collect()
279}
280
281fn extract_trigger(description: &str) -> Option<String> {
286 for line in description.lines() {
287 let trimmed = line.trim();
288 let lower = trimmed.to_lowercase();
289 if lower.starts_with("use when") || lower.starts_with("use this when") {
290 return Some(trimmed.to_string());
291 }
292 }
293 None
294}
295
296fn compute_query_match(query: &str, name: &str, description: &str) -> (QueryMatch, f64) {
308 let query_tokens = tokenize(query);
309
310 if query_tokens.is_empty() {
311 return (QueryMatch::None, 0.0);
312 }
313
314 let desc_tokens = tokenize(description);
315
316 let query_set: std::collections::HashSet<&str> =
320 query_tokens.iter().map(|s| s.as_str()).collect();
321 let desc_set: std::collections::HashSet<&str> =
322 desc_tokens.iter().map(|s| s.as_str()).collect();
323 let intersection = query_set.intersection(&desc_set).count();
324 let desc_overlap = if query_set.is_empty() {
325 0.0
326 } else {
327 intersection as f64 / query_set.len() as f64
328 };
329
330 let trigger_score = if let Some(trigger) = extract_trigger(description) {
332 let trigger_lower = trigger.to_lowercase();
333 if query_tokens
334 .iter()
335 .any(|t| trigger_lower.contains(t.as_str()))
336 {
337 1.0
338 } else {
339 0.0
340 }
341 } else {
342 0.0
343 };
344
345 let name_lower = name.to_lowercase();
347 let name_score = if query_tokens.iter().any(|t| name_lower.contains(t.as_str())) {
348 1.0
349 } else {
350 0.0
351 };
352
353 let score = 0.5 * desc_overlap + 0.3 * trigger_score + 0.2 * name_score;
355
356 let category = if score >= 0.4 {
357 QueryMatch::Strong
358 } else if score >= 0.15 {
359 QueryMatch::Weak
360 } else {
361 QueryMatch::None
362 };
363
364 (category, score)
365}
366
367#[cfg(test)]
368mod tests {
369 use super::*;
370 use std::fs;
371 use tempfile::tempdir;
372
373 fn make_skill(
375 name: &str,
376 description: &str,
377 body: &str,
378 ) -> (tempfile::TempDir, std::path::PathBuf) {
379 let parent = tempdir().unwrap();
380 let dir = parent.path().join(name);
381 fs::create_dir(&dir).unwrap();
382 fs::write(
383 dir.join("SKILL.md"),
384 format!("---\nname: {name}\ndescription: {description}\n---\n{body}\n"),
385 )
386 .unwrap();
387 (parent, dir)
388 }
389
390 #[test]
393 fn strong_match_when_query_words_in_description() {
394 let (m, score) = compute_query_match(
395 "process PDF files",
396 "pdf-processor",
397 "Processes PDF files and generates detailed reports",
398 );
399 assert_eq!(m, QueryMatch::Strong);
400 assert!(score >= 0.4, "score {score} should be ≥ 0.4");
401 }
402
403 #[test]
404 fn weak_match_with_partial_overlap() {
405 let (m, score) = compute_query_match(
406 "generate database migration scripts quickly",
407 "pdf-processor",
408 "Processes PDF files and generates detailed reports",
409 );
410 assert!(
411 matches!(m, QueryMatch::Weak | QueryMatch::None),
412 "expected Weak or None for partial overlap, got {m:?} (score: {score})"
413 );
414 }
415
416 #[test]
417 fn no_match_with_unrelated_query() {
418 let (m, score) = compute_query_match(
419 "deploy kubernetes cluster",
420 "pdf-processor",
421 "Processes PDF files and generates detailed reports",
422 );
423 assert_eq!(m, QueryMatch::None);
424 assert!(score < 0.15, "score {score} should be < 0.15");
425 }
426
427 #[test]
428 fn empty_query_is_no_match() {
429 let (m, score) = compute_query_match("", "some-skill", "Some description");
430 assert_eq!(m, QueryMatch::None);
431 assert_eq!(score, 0.0);
432 }
433
434 #[test]
435 fn case_insensitive_matching() {
436 let (m, _score) = compute_query_match(
437 "PDF PROCESSING",
438 "pdf-processor",
439 "Processes pdf files and generates reports",
440 );
441 assert!(
442 matches!(m, QueryMatch::Strong | QueryMatch::Weak),
443 "expected Strong or Weak for case-insensitive match, got {m:?}"
444 );
445 }
446
447 #[test]
450 fn trigger_phrase_boosts_score() {
451 let (_, score_with_trigger) = compute_query_match(
454 "lint javascript",
455 "unrelated-name",
456 "Analyzes syntax patterns. Use when you want to lint javascript files.",
457 );
458 let (_, score_without_trigger) = compute_query_match(
459 "lint javascript",
460 "unrelated-name",
461 "Analyzes syntax patterns in various source files.",
462 );
463 assert!(
464 score_with_trigger > score_without_trigger,
465 "trigger phrase should boost score: {score_with_trigger} vs {score_without_trigger}"
466 );
467 }
468
469 #[test]
470 fn name_match_boosts_score() {
471 let (_, score_name_match) = compute_query_match(
472 "process pdf",
473 "pdf-processor",
474 "Handles document transformation tasks.",
475 );
476 let (_, score_no_name) = compute_query_match(
477 "process pdf",
478 "document-handler",
479 "Handles document transformation tasks.",
480 );
481 assert!(
482 score_name_match > score_no_name,
483 "name match should boost score: {score_name_match} vs {score_no_name}"
484 );
485 }
486
487 #[test]
488 fn all_zero_inputs_produce_zero_score() {
489 let (m, score) = compute_query_match(
490 "xylophone zephyr",
491 "unrelated-name",
492 "Completely unrelated description about cooking pasta.",
493 );
494 assert_eq!(m, QueryMatch::None);
495 assert_eq!(score, 0.0, "totally unrelated query should score 0.0");
496 }
497
498 #[test]
501 fn test_skill_returns_result_for_valid_skill() {
502 let (_parent, dir) = make_skill(
503 "pdf-tool",
504 "Processes PDF files and extracts text content",
505 "Body content here.",
506 );
507 let result = test_skill(&dir, "process some PDF files").unwrap();
508 assert_eq!(result.name, "pdf-tool");
509 assert_eq!(result.query_match, QueryMatch::Strong);
510 assert!(result.estimated_tokens > 0);
511 }
512
513 #[test]
514 fn test_skill_reports_validation_issues() {
515 let parent = tempdir().unwrap();
516 let dir = parent.path().join("bad-skill");
517 fs::create_dir(&dir).unwrap();
518 fs::write(dir.join("SKILL.md"), "---\nname: bad-skill\n---\nBody.\n").unwrap();
520 let result = test_skill(&dir, "anything");
521 assert!(result.is_err());
523 }
524
525 #[test]
526 fn test_skill_detects_structure_issues() {
527 let (_parent, dir) = make_skill(
528 "ref-skill",
529 "Skill with broken reference",
530 "See [guide](nonexistent.md) for details.",
531 );
532 let result = test_skill(&dir, "guide reference").unwrap();
533 assert!(
534 !result.structure_diagnostics.is_empty(),
535 "expected structure diagnostics for broken reference",
536 );
537 }
538
539 #[test]
542 fn format_includes_skill_name_and_query() {
543 let (_parent, dir) =
544 make_skill("format-test", "A test skill for formatting output", "Body.");
545 let result = test_skill(&dir, "test formatting").unwrap();
546 let text = format_test_result(&result);
547 assert!(text.contains("format-test"));
548 assert!(text.contains("test formatting"));
549 }
550
551 #[test]
552 fn format_shows_activation_status() {
553 let (_parent, dir) = make_skill("activation-test", "Processes PDF files quickly", "Body.");
554 let result = test_skill(&dir, "deploy kubernetes cluster").unwrap();
555 let text = format_test_result(&result);
556 assert!(text.contains("NONE"));
557 }
558
559 #[test]
560 fn format_shows_pass_for_clean_skill() {
561 let (_parent, dir) = make_skill(
562 "clean-skill",
563 "A clean skill that passes validation",
564 "Body content.",
565 );
566 let result = test_skill(&dir, "clean skill").unwrap();
567 let text = format_test_result(&result);
568 assert!(text.contains("PASS"));
569 }
570
571 #[test]
574 fn fmt_field_short_value_no_wrap() {
575 let mut out = String::new();
576 fmt_field(&mut out, "Label:", "short", 13, 80);
577 assert_eq!(out, "Label: short\n");
578 }
579
580 #[test]
581 fn fmt_field_long_value_wraps_aligned() {
582 let mut out = String::new();
583 fmt_field(
585 &mut out,
586 "Description:",
587 "Validates AI agent skill definitions against the spec",
588 13,
589 40,
590 );
591 let lines: Vec<&str> = out.lines().collect();
592 assert!(lines.len() > 1, "expected wrapping, got: {out:?}");
593 for line in &lines[1..] {
595 assert!(
596 line.starts_with(" "),
597 "continuation not aligned: {line:?}",
598 );
599 }
600 }
601
602 #[test]
603 fn fmt_field_multibyte_utf8_no_panic() {
604 let mut out = String::new();
605 fmt_field(
609 &mut out,
610 "Activation:",
611 "WEAK ⚠ — some overlap, but description may not trigger reliably (score: 0.33)",
612 13,
613 50,
614 );
615 let lines: Vec<&str> = out.lines().collect();
616 assert!(lines.len() > 1, "expected wrapping, got: {out:?}");
617 for line in &lines[1..] {
618 assert!(
619 line.starts_with(" "),
620 "continuation not aligned: {line:?}",
621 );
622 }
623 }
624
625 #[test]
626 fn fmt_field_char_count_not_byte_len() {
627 let mut out = String::new();
628 fmt_field(&mut out, "Item:", "café latte warm drink", 6, 20);
632 assert!(!out.is_empty(), "should produce output without panic",);
633 for line in out.lines() {
635 assert!(
636 line.chars().count() <= 20,
637 "line exceeds width: {line:?} ({} chars)",
638 line.chars().count(),
639 );
640 }
641 }
642
643 #[test]
644 fn fmt_field_consecutive_spaces_no_blank_lines() {
645 let mut out = String::new();
646 fmt_field(&mut out, "Label:", "word word word end", 6, 18);
647 for line in out.lines() {
648 let trimmed = line.trim();
649 assert!(!trimmed.is_empty(), "blank continuation line: {out:?}");
650 }
651 }
652
653 #[test]
654 fn format_test_result_wraps_description() {
655 let long_desc = "Validates AI agent skill definitions (SKILL.md files) against \
656 the Anthropic agent skill specification and checks all fields";
657 let (_parent, dir) = make_skill("wrap-test", long_desc, "Body content.");
658 let result = test_skill(&dir, "validate skill").unwrap();
659 let text = format_test_result_width(&result, 60);
660 let desc_lines: Vec<&str> = text
661 .lines()
662 .skip_while(|l| !l.starts_with("Description:"))
663 .take_while(|l| !l.is_empty())
664 .collect();
665 assert!(
666 desc_lines.len() > 1,
667 "description should wrap at width 60: {desc_lines:?}",
668 );
669 for line in &desc_lines[1..] {
670 assert!(
671 line.starts_with(" "),
672 "continuation not aligned: {line:?}",
673 );
674 }
675 }
676}