vibe-tests 0.0.1

Integration test framework for MCP servers with LLM-powered tool calling.
Documentation
//! Test report structures.
//! Collected during test execution and available in on_stop callback.

use serde::Serialize;

/// Tracing event: test started.
const TRACE_TEST_START: &str = "test_start";
/// Tracing event: test passed.
const TRACE_TEST_OK: &str = "test_ok";
/// Tracing event: test failed.
const TRACE_TEST_FAIL: &str = "test_fail";
/// Field key for query text.
const KEY_QUERY: &str = "query";
/// Field key for model name.
const KEY_MODEL: &str = "model";
/// Field key for tool name.
const KEY_TOOL: &str = "tool";
/// Field key for tool arguments.
const KEY_ARGS: &str = "args";
/// Field key for model's text response.
const KEY_MODEL_RESPONSE: &str = "model_response";
/// Field key for raw MCP tool result.
const KEY_TOOL_RESPONSE: &str = "tool_response";
/// Field key for error message.
const KEY_ERROR: &str = "error";
/// Field key for timestamp.
const KEY_TIMESTAMP: &str = "timestamp";
/// Field key for MCP error code.
const KEY_CODE: &str = "code";
/// Field key for duration in milliseconds.
const KEY_DURATION_MS: &str = "duration_ms";

/// Single test query report.
#[derive(Debug, Serialize)]
pub struct EngineTestReport {
    /// Model used for this test.
    pub model: String,
    /// Query text sent to the model.
    pub query: String,
    /// Tool called by the model (None if no tool was called).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool: Option<String>,
    /// Arguments passed to the tool (JSON string, None if test failed).
    #[serde(skip_serializing_if = "Option::is_none")]
    pub args: Option<String>,
    /// ISO 8601 timestamp when the test ran.
    pub timestamp: String,
    /// Text response from the model.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub model_response: Option<String>,
    /// Raw result from MCP tool.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub tool_response: Option<String>,
    /// MCP error code.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub code: Option<i32>,
    /// Query execution duration in milliseconds.
    #[serde(skip_serializing_if = "Option::is_none")]
    pub duration_ms: Option<i32>,
    /// Whether the test passed.
    pub success: bool,
}

/// Complete test report for all queries.
#[derive(Debug, Serialize)]
pub struct EngineReport {
    /// MCP server host being tested.
    pub host: String,
    /// Total test session duration in milliseconds.
    pub duration_ms: u64,
    /// Average query duration in milliseconds.
    pub avg_duration_ms: u64,
    /// Individual test results.
    pub tests: Vec<EngineTestReport>,
}

impl EngineReport {
    /// Log test start event.
    pub fn trace_start(query: &str, model: &str) {
        tracing::trace!(
            "{} {}=§|{}|§ {}=§|{}|§ {}=§|{}|§",
            TRACE_TEST_START,
            KEY_QUERY,
            query,
            KEY_MODEL,
            model,
            KEY_TIMESTAMP,
            chrono::Utc::now().to_rfc3339()
        );
    }

    /// Log test success event.
    pub fn trace_ok(
        query: &str,
        model: &str,
        tool: &str,
        args: &str,
        model_response: &str,
        tool_response: &str,
        duration_ms: u64,
    ) {
        let args_json = serde_json::to_string(args).unwrap_or_else(|_| args.to_string());
        let model_response_json =
            serde_json::to_string(model_response).unwrap_or_else(|_| model_response.to_string());
        let tool_response_json =
            serde_json::to_string(tool_response).unwrap_or_else(|_| tool_response.to_string());
        tracing::trace!(
            "{} {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§",
            TRACE_TEST_OK,
            KEY_QUERY,
            query,
            KEY_MODEL,
            model,
            KEY_TOOL,
            tool,
            KEY_ARGS,
            args_json,
            KEY_MODEL_RESPONSE,
            model_response_json,
            KEY_TOOL_RESPONSE,
            tool_response_json,
            KEY_DURATION_MS,
            duration_ms
        );
    }

    /// Log test failure event.
    pub fn trace_fail(
        query: &str,
        model: &str,
        tool: Option<&str>,
        args: Option<&str>,
        error: &str,
        code: i32,
        duration_ms: u64,
    ) {
        let error_json = serde_json::to_string(error).unwrap_or_else(|_| error.to_string());
        let tool_str = tool.unwrap_or("");
        let args_str = args
            .map(|a| serde_json::to_string(a).unwrap_or_else(|_| a.to_string()))
            .unwrap_or_default();
        tracing::trace!(
            "{} {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§ {}=§|{}|§",
            TRACE_TEST_FAIL,
            KEY_QUERY,
            query,
            KEY_MODEL,
            model,
            KEY_TOOL,
            tool_str,
            KEY_ARGS,
            args_str,
            KEY_ERROR,
            error_json,
            KEY_CODE,
            code,
            KEY_DURATION_MS,
            duration_ms
        );
    }

    /// Parse log file and build report from tracing events.
    pub fn from_log(path: &str, host: &str) -> Self {
        let content = std::fs::read_to_string(path).unwrap_or_default();
        let mut tests = Vec::new();
        let mut pending: Option<(String, String, String)> = None; // query, model, timestamp

        for line in content.lines() {
            if line.contains(TRACE_TEST_START) {
                let query = Self::extract_value(line, KEY_QUERY);
                let model = Self::extract_value(line, KEY_MODEL);
                let timestamp = Self::extract_value(line, KEY_TIMESTAMP);
                if let (Some(q), Some(m), Some(t)) = (query, model, timestamp) {
                    pending = Some((q, m, t));
                }
            } else if line.contains(TRACE_TEST_OK) && pending.is_some() {
                let (query, model, timestamp) = pending.take().unwrap();
                tests.push(EngineTestReport {
                    success: true,
                    query,
                    model,
                    timestamp,
                    tool: Self::extract_value(line, KEY_TOOL),
                    args: Self::extract_value(line, KEY_ARGS),
                    model_response: Self::extract_value(line, KEY_MODEL_RESPONSE),
                    tool_response: Self::extract_value(line, KEY_TOOL_RESPONSE),
                    code: None,
                    duration_ms: Self::extract_value(line, KEY_DURATION_MS)
                        .and_then(|v| v.parse::<i32>().ok()),
                });
            } else if line.contains(TRACE_TEST_FAIL) && pending.is_some() {
                let (query, model, timestamp) = pending.take().unwrap();
                tests.push(EngineTestReport {
                    success: false,
                    query,
                    model,
                    timestamp,
                    tool: Self::extract_value(line, KEY_TOOL),
                    args: Self::extract_value(line, KEY_ARGS),
                    model_response: None,
                    tool_response: None,
                    duration_ms: Self::extract_value(line, KEY_DURATION_MS)
                        .and_then(|v| v.parse::<i32>().ok()),
                    code: Self::extract_value(line, KEY_CODE).and_then(|v| v.parse::<i32>().ok()),
                });
            }
        }

        let total = tests
            .iter()
            .map(|t| t.duration_ms)
            .filter(|i| i.is_some())
            .map(|i| i.unwrap() as u64)
            .sum::<u64>();
        let count = tests.len() as u64;
        Self {
            host: host.to_string(),
            duration_ms: total,
            avg_duration_ms: if count > 0 { total / count } else { 0 },
            tests,
        }
    }

    /// Extract value between delimiters for a given key. Tries JSON parse, falls back to raw string.
    fn extract_value(line: &str, key: &str) -> Option<String> {
        let prefix = format!("{}=§|", key);
        let start = line.find(&prefix)? + prefix.len();
        let end = line[start..].find("")?;
        let raw = line[start..start + end].to_string();
        Some(serde_json::from_str(&raw).ok().unwrap_or(raw))
    }
}