collet 0.1.0 - Docs.rs

//! Skillforge — default evolution engine.
//!
//! LLM-driven workspace mutation with EGL gating.  Analyzes observation
//! batches and proposes mutations to the workspace: new or improved skills,
//! prompt refinements, and memory entries.
//!
//! Uses Collet's existing `OpenAiCompatibleProvider` for LLM calls.

use std::collections::HashMap;

use anyhow::Result;
use async_trait::async_trait;

use crate::api::models::{ChatRequest, Message};
use crate::api::provider::OpenAiCompatibleProvider;
use crate::evolution::config::EvolveConfig;
use crate::evolution::engine::EvolutionEngine;
use crate::evolution::history::EvolutionHistory;
use crate::evolution::trial::TrialRunner;
use crate::evolution::types::{Observation, StepResult};
use crate::evolution::workspace::AgentWorkspace;

/// Skillforge evolution engine — LLM-driven workspace mutation.
pub struct SkillforgeEngine {
    config: EvolveConfig,
    client: OpenAiCompatibleProvider,
    accept_history: Vec<bool>,
}

impl SkillforgeEngine {
    pub fn new(config: EvolveConfig, client: OpenAiCompatibleProvider) -> Self {
        Self {
            config,
            client,
            accept_history: Vec::new(),
        }
    }

    /// Build an analysis prompt from observations for the evolver LLM.
    fn build_analysis_prompt(
        &self,
        workspace: &AgentWorkspace,
        observations: &[Observation],
        history: &EvolutionHistory,
    ) -> String {
        let mut prompt = String::new();

        prompt.push_str("You are an evolution engine analyzing an AI agent's performance.\n");
        prompt.push_str(
            "Your task: identify failure patterns and propose targeted workspace mutations.\n\n",
        );

        // Current workspace state
        let sys_prompt = workspace.read_prompt().unwrap_or_default();
        if !sys_prompt.is_empty() {
            prompt.push_str("## System Prompt (current)\n```\n");
            prompt.push_str(truncate(&sys_prompt, 2000));
            prompt.push_str("\n```\n\n");
        }

        let skills = workspace.list_skills();
        if !skills.is_empty() {
            prompt.push_str("## Skills\n");
            for s in &skills {
                prompt.push_str(&format!("- **{}**: {}\n", s.name, s.description));
            }
            // Include content of first 3 skills for mutation context.
            for s in skills.iter().take(3) {
                if let Ok(content) = workspace.read_skill(&s.name)
                    && !content.is_empty()
                {
                    prompt.push_str(&format!(
                        "\n### Skill: {}\n```\n{}\n```\n",
                        s.name,
                        truncate(&content, 600)
                    ));
                }
            }
            prompt.push('\n');
        }

        // Prompt fragments in use.
        if let Ok(fragments) = workspace.list_fragments()
            && !fragments.is_empty()
        {
            prompt.push_str(&format!(
                "## Prompt Fragments: {} defined\n",
                fragments.len()
            ));
            // Read first fragment for context (e.g. "rules").
            if let Some(frag_name) = fragments.first()
                && let Ok(frag_content) = workspace.read_fragment(frag_name)
                && !frag_content.is_empty()
            {
                prompt.push_str(&format!(
                    "### Fragment `{frag_name}`\n```\n{}\n```\n",
                    truncate(&frag_content, 400)
                ));
            }
            prompt.push('\n');
        }

        // Recent episodic memories.
        if let Ok(episodic) = workspace.read_memories("episodic", 5)
            && !episodic.is_empty()
        {
            prompt.push_str(&format!(
                "## Episodic Memories: {} recent entries\n\n",
                episodic.len()
            ));
        }

        // Recent memories across all categories.
        if let Ok(memories) = workspace.read_all_memories(20)
            && !memories.is_empty()
        {
            prompt.push_str(&format!(
                "## Total Memories: {} entries across categories\n\n",
                memories.len()
            ));
        }

        // Evolution history entries.
        if let Ok(evo_history) = workspace.read_evolution_history()
            && !evo_history.is_empty()
        {
            prompt.push_str(&format!(
                "## Evolution History: {} cycles logged\n\n",
                evo_history.len()
            ));
        }

        // Evolution metrics.
        if let Ok(metrics) = workspace.read_evolution_metrics()
            && metrics != serde_json::json!({})
        {
            prompt.push_str("## Evolution Metrics: available\n\n");
        }

        let scores = history.get_score_curve();
        if !scores.is_empty() {
            let curve: Vec<String> = scores
                .iter()
                .enumerate()
                .map(|(i, s)| format!("Cycle {}: {:.3}", i + 1, s))
                .collect();
            prompt.push_str(&format!("## Score History\n{}\n\n", curve.join(", ")));

            // Show the system prompt at the best-scoring cycle as a reference.
            if let Some((best_idx, _)) = scores
                .iter()
                .enumerate()
                .max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal))
            {
                let best_label = format!("evo-{}", best_idx + 1);
                if let Ok(best_prompt) = history.read_file_at(&best_label, "prompt.md")
                    && !best_prompt.is_empty()
                {
                    prompt.push_str(&format!(
                        "## Best Prompt (cycle {})\n```\n{}\n```\n\n",
                        best_idx + 1,
                        truncate(&best_prompt, 1000)
                    ));
                }
            }
        }

        // Version tags and recent git log.
        if let Ok(versions) = history.list_versions()
            && !versions.is_empty()
        {
            prompt.push_str(&format!(
                "## Version Tags: {} total (e.g. {})\n\n",
                versions.len(),
                versions[0]
            ));
        }
        if let Ok(log) = history.get_version_log(3)
            && !log.is_empty()
        {
            prompt.push_str(&format!(
                "## Recent Version Log\n```\n{}\n```\n\n",
                truncate(&log, 400)
            ));
        }

        // Historical failure summary from observer logs.
        if let Ok(hist_failures) = history.get_observations(3, true)
            && !hist_failures.is_empty()
        {
            prompt.push_str(&format!(
                "## Recent Historical Failures (last 3 cycles): {}\n\n",
                hist_failures.len()
            ));
        }

        // Aggregate stats for context.
        if let Ok(stats) = history.get_summary_stats()
            && let Some(total) = stats.get("total_observations").and_then(|v| v.as_u64())
        {
            prompt.push_str(&format!(
                "## Aggregate Stats: {total} total observations\n\n"
            ));
        }

        // Observation batch summary
        let total = observations.len();
        let successes = observations.iter().filter(|o| o.feedback.success).count();
        let avg = if total > 0 {
            observations.iter().map(|o| o.feedback.score).sum::<f64>() / total as f64
        } else {
            0.0
        };

        prompt.push_str(&format!("## Current Batch\nTotal: {total}, Success: {successes}/{total}, Avg Score: {avg:.3}\n\n"));

        // Failed task details (most useful for mutation decisions)
        let failures: Vec<_> = observations
            .iter()
            .filter(|o| !o.feedback.success)
            .collect();
        if !failures.is_empty() {
            prompt.push_str("## Failures (most recent, up to 5)\n\n");
            for (i, obs) in failures.iter().enumerate().take(5) {
                prompt.push_str(&format!(
                    "### Failure {}\n- Task: {}\n- Output: {}\n- Feedback: {}\n\n",
                    i + 1,
                    truncate(&obs.task.input, 400),
                    truncate(&obs.trajectory.output, 400),
                    truncate(&obs.feedback.detail, 400),
                ));
            }
        }

        // Mutation constraints
        let mut allowed = Vec::new();
        if self.config.evolve_prompts {
            allowed.push("system prompt");
        }
        if self.config.evolve_skills {
            allowed.push("skills (create/modify/delete)");
        }
        if self.config.evolve_memory {
            allowed.push("memory entries");
        }
        if self.config.evolve_tools {
            allowed.push("tool definitions");
        }
        prompt.push_str(&format!("## Allowed Mutations\n{}\n\n", allowed.join(", ")));

        prompt.push_str(r#"## Output Format

Respond with a JSON object (and nothing else) following this schema:

```json
{
  "analysis": "Brief explanation of identified failure patterns",
  "mutations": {
    "prompt": "New system prompt content (omit key if no change)",
    "skills": {
      "skill-name": "Full SKILL.md content with YAML frontmatter (use \"__DELETE__\" to remove)"
    },
    "memories": [
      { "category": "episodic", "content": "...", "source": "evolution" }
    ]
  }
}
```

Focus on targeted, evidence-based mutations. If no mutation is warranted, return `{"analysis": "...", "mutations": {}}`.
"#);

        prompt
    }

    /// Call the evolver LLM and return the raw text response.
    async fn call_llm(&self, prompt: &str) -> Result<String> {
        let request = ChatRequest {
            model: self.config.evolver_model.clone(),
            messages: vec![
                Message {
                    role: "system".to_string(),
                    content: Some(crate::api::content::Content::Text(
                        "You are a precise evolution engine. Output only valid JSON.".to_string(),
                    )),
                    reasoning_content: None,
                    tool_calls: None,
                    tool_call_id: None,
                },
                Message {
                    role: "user".to_string(),
                    content: Some(crate::api::content::Content::Text(prompt.to_string())),
                    reasoning_content: None,
                    tool_calls: None,
                    tool_call_id: None,
                },
            ],
            tools: None,
            tool_choice: None,
            max_tokens: self.config.evolver_max_tokens,
            stream: false,
            temperature: Some(0.3),
            thinking_budget_tokens: None,
            reasoning_effort: None,
        };

        let response = self.client.chat(&request).await?;
        let text = response
            .choices
            .first()
            .and_then(|c| c.message.content.as_ref())
            .map(|c| match c {
                crate::api::content::Content::Text(t) => t.clone(),
                crate::api::content::Content::Parts(parts) => parts
                    .iter()
                    .filter_map(|p| match p {
                        crate::api::content::ContentPart::Text { text } => Some(text.clone()),
                        _ => None,
                    })
                    .collect::<Vec<_>>()
                    .join(""),
            })
            .unwrap_or_default();

        Ok(text)
    }

    /// Parse a JSON block from the LLM response (handles code-fenced JSON).
    fn parse_mutations(text: &str) -> serde_json::Value {
        // Strip ```json ... ``` fences if present
        let cleaned = if let Some(start) = text.find("```json") {
            let inner = &text[start + 7..];
            if let Some(end) = inner.find("```") {
                inner[..end].trim().to_string()
            } else {
                inner.trim().to_string()
            }
        } else if let Some(start) = text.find('{') {
            if let Some(end) = text.rfind('}') {
                text[start..=end].to_string()
            } else {
                String::new()
            }
        } else {
            String::new()
        };

        serde_json::from_str(&cleaned).unwrap_or(serde_json::json!({}))
    }

    /// Apply parsed mutations to the workspace.
    fn apply_mutations(
        &self,
        workspace: &AgentWorkspace,
        mutations: &serde_json::Value,
    ) -> Result<(bool, String)> {
        let mut mutated = false;
        let mut summaries: Vec<String> = Vec::new();

        let mutation_obj = mutations.get("mutations").unwrap_or(mutations);

        // Clear drafts at start of each mutation cycle so stale staged content
        // from a previous cycle does not accumulate.
        let _ = workspace.clear_drafts();

        // Apply prompt mutation
        if self.config.evolve_prompts
            && let Some(new_prompt) = mutation_obj.get("prompt").and_then(|v| v.as_str())
            && !new_prompt.is_empty()
        {
            workspace.write_prompt(new_prompt)?;
            mutated = true;
            summaries.push("updated system prompt".to_string());
        }

        // Apply prompt fragment mutations.
        if self.config.evolve_prompts
            && let Some(frags) = mutation_obj.get("fragments").and_then(|v| v.as_object())
        {
            for (name, content) in frags {
                if let Some(content_str) = content.as_str()
                    && !content_str.is_empty()
                {
                    workspace.write_fragment(name, content_str)?;
                    mutated = true;
                    summaries.push(format!("wrote fragment '{name}'"));
                }
            }
        }

        // Apply skill mutations
        if self.config.evolve_skills
            && let Some(skills) = mutation_obj.get("skills").and_then(|v| v.as_object())
        {
            for (name, content) in skills {
                if let Some(content_str) = content.as_str() {
                    if content_str == "__DELETE__" {
                        workspace.delete_skill(name)?;
                        summaries.push(format!("deleted skill '{name}'"));
                    } else {
                        // Stage the draft before committing to workspace.
                        let _ = workspace.write_draft(name, content_str);
                        workspace.write_skill(name, content_str)?;
                        summaries.push(format!("wrote skill '{name}'"));
                    }
                    mutated = true;
                }
            }
            // Log how many skills were staged as drafts.
            let draft_count = workspace.list_drafts().len();
            if draft_count > 0 {
                tracing::debug!(draft_count, "Staged skill drafts before applying mutations");
            }
        }

        // Apply memory mutations
        if self.config.evolve_memory
            && let Some(mems) = mutation_obj.get("memories").and_then(|v| v.as_array())
        {
            for entry in mems {
                let category = entry
                    .get("category")
                    .and_then(|v| v.as_str())
                    .unwrap_or("episodic");
                workspace.add_memory(entry, category)?;
                mutated = true;
            }
            if !mems.is_empty() {
                summaries.push(format!("added {} memory entries", mems.len()));
            }
        }

        let summary = if summaries.is_empty() {
            mutations
                .get("analysis")
                .and_then(|v| v.as_str())
                .unwrap_or("no mutations applied")
                .to_string()
        } else {
            format!(
                "{} — {}",
                summaries.join("; "),
                mutations
                    .get("analysis")
                    .and_then(|v| v.as_str())
                    .unwrap_or("")
            )
        };

        Ok((mutated, summary))
    }
}

#[async_trait]
impl EvolutionEngine for SkillforgeEngine {
    async fn step(
        &mut self,
        workspace: &AgentWorkspace,
        observations: &[Observation],
        history: &EvolutionHistory,
        _trial: &TrialRunner,
    ) -> Result<StepResult> {
        tracing::info!(
            engine = self.name(),
            observations = observations.len(),
            "Running skillforge step"
        );

        let prompt = self.build_analysis_prompt(workspace, observations, history);
        let raw = self.call_llm(&prompt).await?;

        tracing::debug!(response_len = raw.len(), "LLM response received");

        let parsed = Self::parse_mutations(&raw);
        let (mutated, summary) = self.apply_mutations(workspace, &parsed)?;

        tracing::info!(mutated, summary = %summary, "Skillforge step complete");

        Ok(StepResult {
            mutated,
            summary,
            metadata: HashMap::new(),
        })
    }

    fn on_cycle_end(&mut self, accepted: bool, _score: f64) {
        self.accept_history.push(accepted);
    }

    fn name(&self) -> &str {
        "skillforge"
    }
}

fn truncate(s: &str, max_len: usize) -> &str {
    if s.len() <= max_len {
        s
    } else {
        let mut end = max_len;
        while end > 0 && !s.is_char_boundary(end) {
            end -= 1;
        }
        &s[..end]
    }
}