use std::collections::HashMap;
use anyhow::Result;
use async_trait::async_trait;
use crate::api::models::{ChatRequest, Message};
use crate::api::provider::OpenAiCompatibleProvider;
use crate::evolution::config::EvolveConfig;
use crate::evolution::engine::EvolutionEngine;
use crate::evolution::history::EvolutionHistory;
use crate::evolution::trial::TrialRunner;
use crate::evolution::types::{Observation, StepResult};
use crate::evolution::workspace::AgentWorkspace;
pub struct SkillforgeEngine {
config: EvolveConfig,
client: OpenAiCompatibleProvider,
accept_history: Vec<bool>,
}
impl SkillforgeEngine {
pub fn new(config: EvolveConfig, client: OpenAiCompatibleProvider) -> Self {
Self {
config,
client,
accept_history: Vec::new(),
}
}
fn build_analysis_prompt(
&self,
workspace: &AgentWorkspace,
observations: &[Observation],
history: &EvolutionHistory,
) -> String {
let mut prompt = String::new();
prompt.push_str("You are an evolution engine analyzing an AI agent's performance.\n");
prompt.push_str(
"Your task: identify failure patterns and propose targeted workspace mutations.\n\n",
);
let sys_prompt = workspace.read_prompt().unwrap_or_default();
if !sys_prompt.is_empty() {
prompt.push_str("## System Prompt (current)\n```\n");
prompt.push_str(truncate(&sys_prompt, 2000));
prompt.push_str("\n```\n\n");
}
let skills = workspace.list_skills();
if !skills.is_empty() {
prompt.push_str("## Skills\n");
for s in &skills {
prompt.push_str(&format!("- **{}**: {}\n", s.name, s.description));
}
for s in skills.iter().take(3) {
if let Ok(content) = workspace.read_skill(&s.name)
&& !content.is_empty()
{
prompt.push_str(&format!(
"\n### Skill: {}\n```\n{}\n```\n",
s.name,
truncate(&content, 600)
));
}
}
prompt.push('\n');
}
if let Ok(fragments) = workspace.list_fragments()
&& !fragments.is_empty()
{
prompt.push_str(&format!(
"## Prompt Fragments: {} defined\n",
fragments.len()
));
if let Some(frag_name) = fragments.first()
&& let Ok(frag_content) = workspace.read_fragment(frag_name)
&& !frag_content.is_empty()
{
prompt.push_str(&format!(
"### Fragment `{frag_name}`\n```\n{}\n```\n",
truncate(&frag_content, 400)
));
}
prompt.push('\n');
}
if let Ok(episodic) = workspace.read_memories("episodic", 5)
&& !episodic.is_empty()
{
prompt.push_str(&format!(
"## Episodic Memories: {} recent entries\n\n",
episodic.len()
));
}
if let Ok(memories) = workspace.read_all_memories(20)
&& !memories.is_empty()
{
prompt.push_str(&format!(
"## Total Memories: {} entries across categories\n\n",
memories.len()
));
}
if let Ok(evo_history) = workspace.read_evolution_history()
&& !evo_history.is_empty()
{
prompt.push_str(&format!(
"## Evolution History: {} cycles logged\n\n",
evo_history.len()
));
}
if let Ok(metrics) = workspace.read_evolution_metrics()
&& metrics != serde_json::json!({})
{
prompt.push_str("## Evolution Metrics: available\n\n");
}
let scores = history.get_score_curve();
if !scores.is_empty() {
let curve: Vec<String> = scores
.iter()
.enumerate()
.map(|(i, s)| format!("Cycle {}: {:.3}", i + 1, s))
.collect();
prompt.push_str(&format!("## Score History\n{}\n\n", curve.join(", ")));
if let Some((best_idx, _)) = scores
.iter()
.enumerate()
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
{
let best_label = format!("evo-{}", best_idx + 1);
if let Ok(best_prompt) = history.read_file_at(&best_label, "prompt.md")
&& !best_prompt.is_empty()
{
prompt.push_str(&format!(
"## Best Prompt (cycle {})\n```\n{}\n```\n\n",
best_idx + 1,
truncate(&best_prompt, 1000)
));
}
}
}
if let Ok(versions) = history.list_versions()
&& !versions.is_empty()
{
prompt.push_str(&format!(
"## Version Tags: {} total (e.g. {})\n\n",
versions.len(),
versions[0]
));
}
if let Ok(log) = history.get_version_log(3)
&& !log.is_empty()
{
prompt.push_str(&format!(
"## Recent Version Log\n```\n{}\n```\n\n",
truncate(&log, 400)
));
}
if let Ok(hist_failures) = history.get_observations(3, true)
&& !hist_failures.is_empty()
{
prompt.push_str(&format!(
"## Recent Historical Failures (last 3 cycles): {}\n\n",
hist_failures.len()
));
}
if let Ok(stats) = history.get_summary_stats()
&& let Some(total) = stats.get("total_observations").and_then(|v| v.as_u64())
{
prompt.push_str(&format!(
"## Aggregate Stats: {total} total observations\n\n"
));
}
let total = observations.len();
let successes = observations.iter().filter(|o| o.feedback.success).count();
let avg = if total > 0 {
observations.iter().map(|o| o.feedback.score).sum::<f64>() / total as f64
} else {
0.0
};
prompt.push_str(&format!("## Current Batch\nTotal: {total}, Success: {successes}/{total}, Avg Score: {avg:.3}\n\n"));
let failures: Vec<_> = observations
.iter()
.filter(|o| !o.feedback.success)
.collect();
if !failures.is_empty() {
prompt.push_str("## Failures (most recent, up to 5)\n\n");
for (i, obs) in failures.iter().enumerate().take(5) {
prompt.push_str(&format!(
"### Failure {}\n- Task: {}\n- Output: {}\n- Feedback: {}\n\n",
i + 1,
truncate(&obs.task.input, 400),
truncate(&obs.trajectory.output, 400),
truncate(&obs.feedback.detail, 400),
));
}
}
let mut allowed = Vec::new();
if self.config.evolve_prompts {
allowed.push("system prompt");
}
if self.config.evolve_skills {
allowed.push("skills (create/modify/delete)");
}
if self.config.evolve_memory {
allowed.push("memory entries");
}
if self.config.evolve_tools {
allowed.push("tool definitions");
}
prompt.push_str(&format!("## Allowed Mutations\n{}\n\n", allowed.join(", ")));
prompt.push_str(r#"## Output Format
Respond with a JSON object (and nothing else) following this schema:
```json
{
"analysis": "Brief explanation of identified failure patterns",
"mutations": {
"prompt": "New system prompt content (omit key if no change)",
"skills": {
"skill-name": "Full SKILL.md content with YAML frontmatter (use \"__DELETE__\" to remove)"
},
"memories": [
{ "category": "episodic", "content": "...", "source": "evolution" }
]
}
}
```
Focus on targeted, evidence-based mutations. If no mutation is warranted, return `{"analysis": "...", "mutations": {}}`.
"#);
prompt
}
async fn call_llm(&self, prompt: &str) -> Result<String> {
let request = ChatRequest {
model: self.config.evolver_model.clone(),
messages: vec![
Message {
role: "system".to_string(),
content: Some(crate::api::content::Content::Text(
"You are a precise evolution engine. Output only valid JSON.".to_string(),
)),
reasoning_content: None,
tool_calls: None,
tool_call_id: None,
},
Message {
role: "user".to_string(),
content: Some(crate::api::content::Content::Text(prompt.to_string())),
reasoning_content: None,
tool_calls: None,
tool_call_id: None,
},
],
tools: None,
tool_choice: None,
max_tokens: self.config.evolver_max_tokens,
stream: false,
temperature: Some(0.3),
thinking_budget_tokens: None,
reasoning_effort: None,
};
let response = self.client.chat(&request).await?;
let text = response
.choices
.first()
.and_then(|c| c.message.content.as_ref())
.map(|c| match c {
crate::api::content::Content::Text(t) => t.clone(),
crate::api::content::Content::Parts(parts) => parts
.iter()
.filter_map(|p| match p {
crate::api::content::ContentPart::Text { text } => Some(text.clone()),
_ => None,
})
.collect::<Vec<_>>()
.join(""),
})
.unwrap_or_default();
Ok(text)
}
fn parse_mutations(text: &str) -> serde_json::Value {
let cleaned = if let Some(start) = text.find("```json") {
let inner = &text[start + 7..];
if let Some(end) = inner.find("```") {
inner[..end].trim().to_string()
} else {
inner.trim().to_string()
}
} else if let Some(start) = text.find('{') {
if let Some(end) = text.rfind('}') {
text[start..=end].to_string()
} else {
String::new()
}
} else {
String::new()
};
serde_json::from_str(&cleaned).unwrap_or(serde_json::json!({}))
}
fn apply_mutations(
&self,
workspace: &AgentWorkspace,
mutations: &serde_json::Value,
) -> Result<(bool, String)> {
let mut mutated = false;
let mut summaries: Vec<String> = Vec::new();
let mutation_obj = mutations.get("mutations").unwrap_or(mutations);
let _ = workspace.clear_drafts();
if self.config.evolve_prompts
&& let Some(new_prompt) = mutation_obj.get("prompt").and_then(|v| v.as_str())
&& !new_prompt.is_empty()
{
workspace.write_prompt(new_prompt)?;
mutated = true;
summaries.push("updated system prompt".to_string());
}
if self.config.evolve_prompts
&& let Some(frags) = mutation_obj.get("fragments").and_then(|v| v.as_object())
{
for (name, content) in frags {
if let Some(content_str) = content.as_str()
&& !content_str.is_empty()
{
workspace.write_fragment(name, content_str)?;
mutated = true;
summaries.push(format!("wrote fragment '{name}'"));
}
}
}
if self.config.evolve_skills
&& let Some(skills) = mutation_obj.get("skills").and_then(|v| v.as_object())
{
for (name, content) in skills {
if let Some(content_str) = content.as_str() {
if content_str == "__DELETE__" {
workspace.delete_skill(name)?;
summaries.push(format!("deleted skill '{name}'"));
} else {
let _ = workspace.write_draft(name, content_str);
workspace.write_skill(name, content_str)?;
summaries.push(format!("wrote skill '{name}'"));
}
mutated = true;
}
}
let draft_count = workspace.list_drafts().len();
if draft_count > 0 {
tracing::debug!(draft_count, "Staged skill drafts before applying mutations");
}
}
if self.config.evolve_memory
&& let Some(mems) = mutation_obj.get("memories").and_then(|v| v.as_array())
{
for entry in mems {
let category = entry
.get("category")
.and_then(|v| v.as_str())
.unwrap_or("episodic");
workspace.add_memory(entry, category)?;
mutated = true;
}
if !mems.is_empty() {
summaries.push(format!("added {} memory entries", mems.len()));
}
}
let summary = if summaries.is_empty() {
mutations
.get("analysis")
.and_then(|v| v.as_str())
.unwrap_or("no mutations applied")
.to_string()
} else {
format!(
"{} — {}",
summaries.join("; "),
mutations
.get("analysis")
.and_then(|v| v.as_str())
.unwrap_or("")
)
};
Ok((mutated, summary))
}
}
#[async_trait]
impl EvolutionEngine for SkillforgeEngine {
async fn step(
&mut self,
workspace: &AgentWorkspace,
observations: &[Observation],
history: &EvolutionHistory,
_trial: &TrialRunner,
) -> Result<StepResult> {
tracing::info!(
engine = self.name(),
observations = observations.len(),
"Running skillforge step"
);
let prompt = self.build_analysis_prompt(workspace, observations, history);
let raw = self.call_llm(&prompt).await?;
tracing::debug!(response_len = raw.len(), "LLM response received");
let parsed = Self::parse_mutations(&raw);
let (mutated, summary) = self.apply_mutations(workspace, &parsed)?;
tracing::info!(mutated, summary = %summary, "Skillforge step complete");
Ok(StepResult {
mutated,
summary,
metadata: HashMap::new(),
})
}
fn on_cycle_end(&mut self, accepted: bool, _score: f64) {
self.accept_history.push(accepted);
}
fn name(&self) -> &str {
"skillforge"
}
}
fn truncate(s: &str, max_len: usize) -> &str {
if s.len() <= max_len {
s
} else {
let mut end = max_len;
while end > 0 && !s.is_char_boundary(end) {
end -= 1;
}
&s[..end]
}
}