swarm-engine-eval 0.1.6

Evaluation framework for SwarmEngine
Documentation
//! CodeEnvironment - コード探索環境
//!
//! 仮想的なコードベースを探索するシミュレーション環境。
//!
//! # アクション
//!
//! - `Grep`: パターンにマッチするファイルを検索
//! - `Read`: ファイル内容を読み取り
//! - `Done`: タスク完了を宣言
//!
//! # 設計
//!
//! - 仮想ファイルシステム(HashMap<path, content>)
//! - Grep でファイルを探索、Read で内容確認
//! - 目標ファイルを Read したら成功

use std::collections::HashMap;
use std::sync::RwLock;

use swarm_engine_core::actions::ParamResolver;
use swarm_engine_core::agent::WorkResult;
use swarm_engine_core::environment::Environment;
use swarm_engine_core::types::{Action, WorkerId};

// ============================================================================
// CodeEnvironment
// ============================================================================

/// コード探索環境
pub struct CodeEnvironment {
    /// 仮想ファイルシステム (path -> content)
    files: HashMap<String, String>,
    /// 目標ファイルパス(これを Read したら成功)
    goal_file: String,
    /// 内部状態
    state: RwLock<CodeState>,
}

#[derive(Debug, Default)]
struct CodeState {
    /// 各 Worker が Read したファイル
    read_files: HashMap<WorkerId, Vec<String>>,
    /// 目標達成した Worker
    completed: Vec<WorkerId>,
}

impl CodeEnvironment {
    /// 新しいコード環境を作成
    pub fn new(files: HashMap<String, String>, goal_file: impl Into<String>) -> Self {
        Self {
            files,
            goal_file: goal_file.into(),
            state: RwLock::new(CodeState::default()),
        }
    }

    /// シンプルな認証コード探索シナリオを作成
    pub fn auth_scenario() -> Self {
        let mut files = HashMap::new();

        // 構造
        files.insert(
            "src/main.rs".into(),
            r#"
mod auth;
mod handlers;
mod db;

fn main() {
    let app = create_app();
    app.run();
}
"#
            .into(),
        );

        files.insert(
            "src/handlers/mod.rs".into(),
            r#"
pub mod user;
pub mod admin;

pub use user::*;
pub use admin::*;
"#
            .into(),
        );

        files.insert(
            "src/handlers/user.rs".into(),
            r#"
use crate::auth::verify_token;

pub fn get_profile(token: &str) -> Result<User, Error> {
    verify_token(token)?;
    // ...
}
"#
            .into(),
        );

        files.insert(
            "src/auth.rs".into(),
            r#"
//! Authentication module
//!
//! Handles user authentication and token verification.

pub fn verify_token(token: &str) -> Result<Claims, AuthError> {
    let decoded = decode_jwt(token)?;
    validate_claims(&decoded)?;
    Ok(decoded.claims)
}

pub fn login(username: &str, password: &str) -> Result<String, AuthError> {
    let user = find_user(username)?;
    if verify_password(password, &user.password_hash) {
        Ok(generate_jwt(&user))
    } else {
        Err(AuthError::InvalidPassword)
    }
}

fn decode_jwt(token: &str) -> Result<TokenData, AuthError> {
    // JWT decoding logic
}
"#
            .into(),
        );

        files.insert(
            "src/db.rs".into(),
            r#"
pub struct Database {
    pool: Pool,
}

impl Database {
    pub fn connect(url: &str) -> Self {
        // ...
    }
}
"#
            .into(),
        );

        files.insert(
            "config/settings.toml".into(),
            r#"
[server]
port = 8080
host = "0.0.0.0"

[database]
url = "postgres://localhost/app"
"#
            .into(),
        );

        Self::new(files, "src/auth.rs")
    }

    // ------------------------------------------------------------------------
    // Action Handlers
    // ------------------------------------------------------------------------

    fn handle_grep(&self, _worker_id: WorkerId, action: &Action) -> WorkResult {
        let resolver = ParamResolver::new(action);
        let pattern = match resolver.require("pattern") {
            Ok(s) => s.to_lowercase(),
            Err(e) => return WorkResult::env_failure(format!("Grep: {}", e)),
        };

        // パターンにマッチするファイルを検索
        let matches: Vec<&str> = self
            .files
            .iter()
            .filter(|(path, content)| {
                path.to_lowercase().contains(&pattern) || content.to_lowercase().contains(&pattern)
            })
            .map(|(path, _)| path.as_str())
            .collect();

        if matches.is_empty() {
            WorkResult::env_success(format!("No files found matching '{}'", pattern))
        } else {
            let result = format!(
                "Found {} file(s) matching '{}':\n{}",
                matches.len(),
                pattern,
                matches.join("\n")
            );
            WorkResult::env_success(result)
        }
    }

    fn handle_read(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
        let resolver = ParamResolver::new(action);
        let path = match resolver.require("path") {
            Ok(s) => s,
            Err(e) => return WorkResult::env_failure(format!("Read: {}", e)),
        };

        // ファイルを検索
        let content = self.files.get(path);

        match content {
            Some(content) => {
                // 読んだファイルを記録
                {
                    let mut state = self.state.write().unwrap();
                    state
                        .read_files
                        .entry(worker_id)
                        .or_default()
                        .push(path.to_string());
                }

                // 目標ファイルに到達?
                if path == self.goal_file {
                    let mut state = self.state.write().unwrap();
                    if !state.completed.contains(&worker_id) {
                        state.completed.push(worker_id);
                    }
                    return WorkResult::done_success(format!(
                        "Found target file: {}\n\n{}",
                        path,
                        content.trim()
                    ));
                }

                WorkResult::env_success(format!("=== {} ===\n{}", path, content.trim()))
            }
            None => WorkResult::env_failure(format!("File not found: {}", path)),
        }
    }

    fn handle_list(&self, _worker_id: WorkerId) -> WorkResult {
        let paths: Vec<&str> = self.files.keys().map(|s| s.as_str()).collect();
        let result = format!("Files in codebase:\n{}", paths.join("\n"));
        WorkResult::env_success(result)
    }
}

impl Environment for CodeEnvironment {
    fn step(&self, worker_id: WorkerId, action: &Action) -> WorkResult {
        match action.name.as_str() {
            "Grep" | "grep" => self.handle_grep(worker_id, action),
            "Read" | "read" => self.handle_read(worker_id, action),
            "List" | "list" | "ls" => self.handle_list(worker_id),
            _ => WorkResult::unsupported(&action.name),
        }
    }

    fn reset(&self) {
        let mut state = self.state.write().unwrap();
        state.read_files.clear();
        state.completed.clear();
    }

    fn name(&self) -> &str {
        "CodeEnvironment"
    }
}

// ============================================================================
// Tests
// ============================================================================

#[cfg(test)]
mod tests {
    use super::*;

    fn is_success(result: &WorkResult) -> bool {
        match result {
            WorkResult::Acted { action_result, .. } => action_result.success,
            WorkResult::Done { success, .. } => *success,
            _ => false,
        }
    }

    fn is_done(result: &WorkResult) -> bool {
        matches!(result, WorkResult::Done { .. })
    }

    #[test]
    fn test_grep_finds_files() {
        let env = CodeEnvironment::auth_scenario();
        let worker = WorkerId(0);

        let action = Action {
            name: "Grep".into(),
            params: swarm_engine_core::types::ActionParams {
                target: Some("auth".into()),
                args: HashMap::new(),
                data: vec![],
            },
        };

        let result = env.step(worker, &action);
        assert!(is_success(&result));
    }

    #[test]
    fn test_read_existing_file() {
        let env = CodeEnvironment::auth_scenario();
        let worker = WorkerId(0);

        let action = Action {
            name: "Read".into(),
            params: swarm_engine_core::types::ActionParams {
                target: Some("src/main.rs".into()),
                args: HashMap::new(),
                data: vec![],
            },
        };

        let result = env.step(worker, &action);
        assert!(is_success(&result));
        assert!(!is_done(&result)); // main.rs は目標ではない
    }

    #[test]
    fn test_read_goal_file_completes() {
        let env = CodeEnvironment::auth_scenario();
        let worker = WorkerId(0);

        let action = Action {
            name: "Read".into(),
            params: swarm_engine_core::types::ActionParams {
                target: Some("src/auth.rs".into()),
                args: HashMap::new(),
                data: vec![],
            },
        };

        let result = env.step(worker, &action);
        assert!(is_success(&result));
        assert!(is_done(&result)); // auth.rs が目標
    }

    #[test]
    fn test_read_nonexistent_file() {
        let env = CodeEnvironment::auth_scenario();
        let worker = WorkerId(0);

        let action = Action {
            name: "Read".into(),
            params: swarm_engine_core::types::ActionParams {
                target: Some("src/notfound.rs".into()),
                args: HashMap::new(),
                data: vec![],
            },
        };

        let result = env.step(worker, &action);
        assert!(!is_success(&result));
    }
}