#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum FailureKind {
ExitNonzero,
Timeout,
PermissionDenied,
WrongApproach,
Partial,
SyntaxError,
Unknown,
}
impl FailureKind {
#[must_use]
pub fn as_str(&self) -> &'static str {
match self {
Self::ExitNonzero => "exit_nonzero",
Self::Timeout => "timeout",
Self::PermissionDenied => "permission_denied",
Self::WrongApproach => "wrong_approach",
Self::Partial => "partial",
Self::SyntaxError => "syntax_error",
Self::Unknown => "unknown",
}
}
#[must_use]
pub fn from_error(error: &str) -> Self {
let lower = error.to_lowercase();
if lower.contains("timed out") || lower.contains("timeout") {
Self::Timeout
} else if lower.contains("permission denied") {
Self::PermissionDenied
} else if lower.contains("exit code") {
Self::ExitNonzero
} else {
Self::Unknown
}
}
}
impl From<zeph_tools::error_taxonomy::ToolErrorCategory> for FailureKind {
fn from(cat: zeph_tools::error_taxonomy::ToolErrorCategory) -> Self {
use zeph_tools::error_taxonomy::ToolErrorCategory as C;
match cat {
C::Timeout => Self::Timeout,
C::PolicyBlocked | C::ConfirmationRequired | C::ToolNotFound => Self::WrongApproach,
C::InvalidParameters | C::TypeMismatch => Self::SyntaxError,
C::RateLimited
| C::ServerError
| C::NetworkError
| C::PermanentFailure
| C::Cancelled => Self::Unknown,
}
}
}
#[derive(Debug, Clone)]
pub enum SkillOutcome {
Success,
ToolFailure {
skill_name: String,
error_context: String,
tool_output: String,
kind: FailureKind,
},
EmptyResponse {
skill_name: String,
},
UserRejection {
skill_name: String,
feedback: String,
},
}
impl SkillOutcome {
#[must_use]
pub fn outcome_str(&self) -> &str {
match self {
Self::Success => "success",
Self::ToolFailure { .. } => "tool_failure",
Self::EmptyResponse { .. } => "empty_response",
Self::UserRejection { .. } => "user_rejection",
}
}
#[must_use]
pub fn skill_name(&self) -> Option<&str> {
match self {
Self::Success => None,
Self::ToolFailure { skill_name, .. }
| Self::EmptyResponse { skill_name }
| Self::UserRejection { skill_name, .. } => Some(skill_name),
}
}
}
#[derive(Debug, Clone)]
pub struct SkillMetrics {
pub skill_name: String,
pub version: i64,
pub total: i64,
pub successes: i64,
pub failures: i64,
}
impl SkillMetrics {
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn success_rate(&self) -> f64 {
if self.total == 0 {
0.0
} else {
self.successes as f64 / self.total as f64
}
}
}
pub const REFLECTION_PROMPT_TEMPLATE: &str = "\
You attempted to help the user with their request using the following skill instructions:
<skill name=\"{name}\">
{body}
</skill>
The attempt failed with this error:
{error_context}
Tool output:
{tool_output}
Analyze what went wrong and suggest an improved approach. \
Then attempt to fulfill the original user request using the improved approach.";
#[must_use]
pub fn build_reflection_prompt(
name: &str,
body: &str,
error_context: &str,
tool_output: &str,
) -> String {
REFLECTION_PROMPT_TEMPLATE
.replace("{name}", name)
.replace("{body}", body)
.replace("{error_context}", error_context)
.replace("{tool_output}", tool_output)
}
pub const IMPROVEMENT_PROMPT_TEMPLATE: &str = "\
The original skill instructions failed, but an alternative approach succeeded.
Original skill:
<skill name=\"{name}\">
{original_body}
</skill>
Failed approach error: {error_context}
Successful approach: {successful_response}
{user_feedback_section}
Generate an improved version of the skill instructions that incorporates the lesson \
learned. Keep the same format (markdown with bash code blocks). Be concise.
The improved skill body must contain at most 3 top-level sections (## headers). \
Keep it focused and concise.
Only output the improved skill body (no frontmatter, no explanation).";
#[must_use]
pub fn build_improvement_prompt(
name: &str,
original_body: &str,
error_context: &str,
successful_response: &str,
user_feedback: Option<&str>,
) -> String {
let feedback_section = user_feedback.map_or_else(String::new, |fb| {
format!("\nUser feedback on the current skill:\n{fb}\n")
});
IMPROVEMENT_PROMPT_TEMPLATE
.replace("{name}", name)
.replace("{original_body}", original_body)
.replace("{error_context}", error_context)
.replace("{successful_response}", successful_response)
.replace("{user_feedback_section}", &feedback_section)
}
#[derive(Debug, Clone, serde::Deserialize, schemars::JsonSchema)]
pub struct SkillEvaluation {
pub should_improve: bool,
pub issues: Vec<String>,
pub severity: String,
}
pub const EVALUATION_PROMPT_TEMPLATE: &str = "\
Evaluate whether the following skill needs improvement based on the error context.
<skill name=\"{name}\">
{body}
</skill>
Error context: {error_context}
Tool output: {tool_output}
Current success rate: {success_rate}%
Determine if this is a systematic skill problem (should_improve: true) \
or a transient issue like network timeout, rate limit, etc. (should_improve: false).
Respond in JSON with fields: should_improve (bool), issues (list of strings), severity (\"low\", \"medium\", or \"high\").";
#[must_use]
#[allow(clippy::cast_precision_loss)]
pub fn build_evaluation_prompt(
name: &str,
body: &str,
error_context: &str,
tool_output: &str,
metrics: &SkillMetrics,
) -> String {
let rate = format!("{:.0}", metrics.success_rate() * 100.0);
EVALUATION_PROMPT_TEMPLATE
.replace("{name}", name)
.replace("{body}", body)
.replace("{error_context}", error_context)
.replace("{tool_output}", tool_output)
.replace("{success_rate}", &rate)
}
pub const DOMAIN_GATE_PROMPT_TEMPLATE: &str = "\
Evaluate whether the following auto-generated skill version stays within \
the domain of the original skill.
Original skill description: {description}
Original skill name: {name}
Generated skill body:
<skill>
{body}
</skill>
Respond in JSON: {\"domain_relevant\": bool, \"reasoning\": string}
Return domain_relevant=true only if the generated body is focused on the \
same domain as the original skill description. Return false if it drifts \
into unrelated topics or adds capabilities beyond the original scope.";
#[derive(Debug, Clone, serde::Deserialize, schemars::JsonSchema)]
pub struct DomainGateResult {
pub domain_relevant: bool,
pub reasoning: String,
}
#[must_use]
pub fn build_domain_gate_prompt(name: &str, description: &str, body: &str) -> String {
DOMAIN_GATE_PROMPT_TEMPLATE
.replace("{description}", description)
.replace("{name}", name)
.replace("{body}", body)
}
pub const MAX_BODY_BYTES: usize = 65_536;
#[must_use]
pub fn validate_body_size(original: &str, generated: &str) -> bool {
generated.len() <= original.len() * 2 && generated.len() <= MAX_BODY_BYTES
}
#[must_use]
pub fn validate_body_sections(body: &str, max_sections: u32) -> bool {
let count = body.lines().filter(|l| l.starts_with("## ")).count();
count <= max_sections as usize
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn outcome_str_variants() {
assert_eq!(SkillOutcome::Success.outcome_str(), "success");
assert_eq!(
SkillOutcome::ToolFailure {
skill_name: "git".into(),
error_context: "err".into(),
tool_output: "out".into(),
kind: FailureKind::Unknown,
}
.outcome_str(),
"tool_failure"
);
assert_eq!(
SkillOutcome::EmptyResponse {
skill_name: "git".into(),
}
.outcome_str(),
"empty_response"
);
assert_eq!(
SkillOutcome::UserRejection {
skill_name: "git".into(),
feedback: "bad".into(),
}
.outcome_str(),
"user_rejection"
);
}
#[test]
fn skill_name_extraction() {
assert!(SkillOutcome::Success.skill_name().is_none());
assert_eq!(
SkillOutcome::ToolFailure {
skill_name: "docker".into(),
error_context: String::new(),
tool_output: String::new(),
kind: FailureKind::Unknown,
}
.skill_name(),
Some("docker")
);
assert_eq!(
SkillOutcome::EmptyResponse {
skill_name: "git".into(),
}
.skill_name(),
Some("git")
);
assert_eq!(
SkillOutcome::UserRejection {
skill_name: "sql".into(),
feedback: String::new(),
}
.skill_name(),
Some("sql")
);
}
#[test]
fn success_rate_zero_total() {
let m = SkillMetrics {
skill_name: "x".into(),
version: 1,
total: 0,
successes: 0,
failures: 0,
};
assert!((m.success_rate() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn success_rate_all_success() {
let m = SkillMetrics {
skill_name: "x".into(),
version: 1,
total: 10,
successes: 10,
failures: 0,
};
assert!((m.success_rate() - 1.0).abs() < f64::EPSILON);
}
#[test]
fn success_rate_all_failures() {
let m = SkillMetrics {
skill_name: "x".into(),
version: 1,
total: 5,
successes: 0,
failures: 5,
};
assert!((m.success_rate() - 0.0).abs() < f64::EPSILON);
}
#[test]
fn success_rate_mixed() {
let m = SkillMetrics {
skill_name: "x".into(),
version: 1,
total: 4,
successes: 3,
failures: 1,
};
assert!((m.success_rate() - 0.75).abs() < f64::EPSILON);
}
#[test]
fn build_reflection_prompt_substitutes() {
let result = build_reflection_prompt("git", "do git stuff", "exit code 1", "fatal: error");
assert!(result.contains("<skill name=\"git\">"));
assert!(result.contains("do git stuff"));
assert!(result.contains("exit code 1"));
assert!(result.contains("fatal: error"));
}
#[test]
fn build_improvement_prompt_without_feedback() {
let result = build_improvement_prompt("git", "original body", "the error", "the fix", None);
assert!(result.contains("<skill name=\"git\">"));
assert!(result.contains("original body"));
assert!(result.contains("the error"));
assert!(result.contains("the fix"));
assert!(!result.contains("User feedback"));
}
#[test]
fn build_improvement_prompt_with_feedback() {
let result = build_improvement_prompt(
"git",
"original body",
"the error",
"the fix",
Some("please fix the commit flow"),
);
assert!(result.contains("User feedback on the current skill:"));
assert!(result.contains("please fix the commit flow"));
}
#[test]
fn validate_body_size_within_limit() {
assert!(validate_body_size("12345", "1234567890"));
}
#[test]
fn validate_body_size_exceeds_limit() {
assert!(!validate_body_size("12345", "12345678901"));
}
#[test]
fn validate_body_size_empty_original() {
assert!(validate_body_size("", ""));
assert!(!validate_body_size("", "x"));
}
#[test]
fn build_evaluation_prompt_substitutes() {
let metrics = SkillMetrics {
skill_name: "git".into(),
version: 1,
total: 10,
successes: 7,
failures: 3,
};
let result =
build_evaluation_prompt("git", "do git stuff", "exit code 1", "fatal", &metrics);
assert!(result.contains("<skill name=\"git\">"));
assert!(result.contains("do git stuff"));
assert!(result.contains("exit code 1"));
assert!(result.contains("fatal"));
assert!(result.contains("70%"));
}
#[test]
fn skill_evaluation_deserialize() {
let json = r#"{"should_improve": true, "issues": ["bad pattern"], "severity": "high"}"#;
let eval: SkillEvaluation = serde_json::from_str(json).unwrap();
assert!(eval.should_improve);
assert_eq!(eval.issues.len(), 1);
assert_eq!(eval.severity, "high");
}
#[test]
fn skill_evaluation_skip() {
let json = r#"{"should_improve": false, "issues": [], "severity": "low"}"#;
let eval: SkillEvaluation = serde_json::from_str(json).unwrap();
assert!(!eval.should_improve);
assert!(eval.issues.is_empty());
}
#[test]
fn validate_body_size_absolute_cap() {
let large_original = "x".repeat(40_000);
let large_generated = "x".repeat(70_000);
assert!(!validate_body_size(&large_original, &large_generated));
}
#[test]
fn validate_body_sections_within_limit() {
let body = "## Setup\ndo stuff\n## Usage\nmore stuff\n";
assert!(validate_body_sections(body, 3));
}
#[test]
fn validate_body_sections_at_limit() {
let body = "## Setup\n## Usage\n## Tips\n";
assert!(validate_body_sections(body, 3));
}
#[test]
fn validate_body_sections_exceeds_limit() {
let body = "## A\n## B\n## C\n## D\n";
assert!(!validate_body_sections(body, 3));
}
#[test]
fn validate_body_sections_no_sections() {
let body = "Just some text without any headers.\n";
assert!(validate_body_sections(body, 3));
}
#[test]
fn validate_body_sections_h1_not_counted() {
let body = "# Title\n## Section\n### Subsection\n";
assert!(validate_body_sections(body, 1));
}
#[test]
fn domain_gate_result_deserialize() {
let json = r#"{"domain_relevant": true, "reasoning": "matches original domain"}"#;
let result: DomainGateResult = serde_json::from_str(json).unwrap();
assert!(result.domain_relevant);
assert_eq!(result.reasoning, "matches original domain");
}
#[test]
fn domain_gate_result_false() {
let json = r#"{"domain_relevant": false, "reasoning": "drifted to unrelated topic"}"#;
let result: DomainGateResult = serde_json::from_str(json).unwrap();
assert!(!result.domain_relevant);
}
#[test]
fn build_domain_gate_prompt_substitutes() {
let result = build_domain_gate_prompt(
"git-helper",
"Git workflow assistant",
"## Usage\nRun git commands",
);
assert!(result.contains("git-helper"));
assert!(result.contains("Git workflow assistant"));
assert!(result.contains("## Usage\nRun git commands"));
assert!(result.contains("{\"domain_relevant\""));
}
#[test]
fn improvement_prompt_includes_section_limit() {
assert!(
IMPROVEMENT_PROMPT_TEMPLATE.contains("at most 3 top-level sections"),
"IMPROVEMENT_PROMPT_TEMPLATE must mention the section limit"
);
}
#[test]
fn skill_evaluation_missing_severity_fails() {
let json = r#"{"should_improve": true, "issues": ["bad pattern"]}"#;
let result: Result<SkillEvaluation, _> = serde_json::from_str(json);
assert!(
result.is_err(),
"expected error when severity field is missing"
);
}
#[test]
fn skill_evaluation_should_improve_as_string_fails() {
let json = r#"{"should_improve": "true", "issues": [], "severity": "low"}"#;
let result: Result<SkillEvaluation, _> = serde_json::from_str(json);
assert!(
result.is_err(),
"expected error when should_improve is a string"
);
}
#[test]
fn skill_evaluation_extra_unknown_fields_succeeds() {
let json =
r#"{"should_improve": false, "issues": [], "severity": "low", "extra_field": 42}"#;
let result: SkillEvaluation = serde_json::from_str(json).unwrap();
assert!(!result.should_improve);
assert_eq!(result.severity, "low");
}
use proptest::prelude::*;
#[test]
fn failure_kind_from_error_timeout() {
assert_eq!(
FailureKind::from_error("operation timed out"),
FailureKind::Timeout
);
assert_eq!(
FailureKind::from_error("timeout after 30s"),
FailureKind::Timeout
);
}
#[test]
fn failure_kind_from_error_permission_denied() {
assert_eq!(
FailureKind::from_error("error: permission denied"),
FailureKind::PermissionDenied
);
}
#[test]
fn failure_kind_from_error_exit_nonzero() {
assert_eq!(
FailureKind::from_error("command failed [exit code 1]"),
FailureKind::ExitNonzero
);
assert_eq!(
FailureKind::from_error("exit code 128"),
FailureKind::ExitNonzero
);
}
#[test]
fn failure_kind_from_error_unknown() {
assert_eq!(
FailureKind::from_error("something went wrong"),
FailureKind::Unknown
);
assert_eq!(FailureKind::from_error(""), FailureKind::Unknown);
}
#[test]
fn failure_kind_as_str_roundtrip() {
assert_eq!(FailureKind::ExitNonzero.as_str(), "exit_nonzero");
assert_eq!(FailureKind::Timeout.as_str(), "timeout");
assert_eq!(FailureKind::PermissionDenied.as_str(), "permission_denied");
assert_eq!(FailureKind::WrongApproach.as_str(), "wrong_approach");
assert_eq!(FailureKind::Partial.as_str(), "partial");
assert_eq!(FailureKind::SyntaxError.as_str(), "syntax_error");
assert_eq!(FailureKind::Unknown.as_str(), "unknown");
}
#[test]
fn failure_kind_from_tool_error_category_key_mappings() {
use zeph_tools::error_taxonomy::ToolErrorCategory as C;
assert_eq!(FailureKind::from(C::Timeout), FailureKind::Timeout);
assert_eq!(
FailureKind::from(C::PolicyBlocked),
FailureKind::WrongApproach
);
assert_eq!(
FailureKind::from(C::ToolNotFound),
FailureKind::WrongApproach
);
assert_eq!(
FailureKind::from(C::InvalidParameters),
FailureKind::SyntaxError
);
assert_eq!(FailureKind::from(C::TypeMismatch), FailureKind::SyntaxError);
assert_eq!(FailureKind::from(C::RateLimited), FailureKind::Unknown);
assert_eq!(FailureKind::from(C::ServerError), FailureKind::Unknown);
assert_eq!(FailureKind::from(C::NetworkError), FailureKind::Unknown);
assert_eq!(FailureKind::from(C::PermanentFailure), FailureKind::Unknown);
assert_eq!(
FailureKind::from(C::ConfirmationRequired),
FailureKind::WrongApproach
);
assert_eq!(FailureKind::from(C::Cancelled), FailureKind::Unknown);
}
proptest! {
#[test]
fn build_evaluation_prompt_never_panics(
name in ".*",
body in ".*",
desc in ".*",
total in 0i64..=1000,
successes in 0i64..=1000,
) {
let failures = total - successes.min(total);
let metrics = SkillMetrics {
skill_name: name.clone(),
version: 1,
total,
successes: successes.min(total),
failures,
};
let _ = build_evaluation_prompt(&name, &body, &desc, "", &metrics);
}
}
}