use crate::agent::{run_agent_loop, AgentLoopConfig};
use crate::error::{RavenClawsError, Result};
use crate::llm::LLMProviderTrait;
use serde::{Deserialize, Serialize};
use std::sync::Arc;
use tracing::{info, instrument, warn};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalConfig {
#[serde(default = "default_suite_name")]
pub name: String,
#[serde(default)]
pub description: String,
#[serde(default = "default_system_prompt")]
pub system_prompt: String,
#[serde(default = "default_max_iterations")]
pub max_iterations: usize,
#[serde(default)]
pub tasks: Vec<EvalTask>,
}
fn default_suite_name() -> String {
"unnamed".to_string()
}
fn default_system_prompt() -> String {
"You are a helpful assistant. Be concise and accurate.".to_string()
}
fn default_max_iterations() -> usize {
5
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalTask {
pub name: String,
#[serde(default)]
pub description: String,
pub prompt: String,
#[serde(default)]
pub golden: String,
#[serde(default)]
pub assertions: Vec<Assertion>,
#[serde(default = "default_weight")]
pub weight: f64,
#[serde(default)]
pub required: bool,
}
fn default_weight() -> f64 {
1.0
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(tag = "type", content = "value")]
pub enum Assertion {
#[serde(rename = "contains")]
Contains(String),
#[serde(rename = "not_contains")]
NotContains(String),
#[serde(rename = "exact")]
Exact(String),
#[serde(rename = "regex")]
Regex(String),
#[serde(rename = "non_empty")]
NonEmpty,
#[serde(rename = "min_length")]
MinLength(usize),
#[serde(rename = "max_length")]
MaxLength(usize),
#[serde(rename = "tool_called")]
ToolCalled(String),
#[serde(rename = "tool_not_called")]
ToolNotCalled(String),
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct RunTrace {
pub task_name: String,
pub started_at: String,
pub ended_at: String,
pub duration_ms: u64,
pub iterations: usize,
pub steps: Vec<TraceStep>,
pub llm_calls: Vec<LlmCallTrace>,
pub tool_calls: Vec<ToolCallTrace>,
pub final_response: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TraceStep {
pub number: usize,
pub step_type: StepType,
pub content: String,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub enum StepType {
Thought,
ToolCall,
Observation,
Final,
Error,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmCallTrace {
pub iteration: usize,
pub provider: String,
pub model: String,
pub prompt_tokens: Option<u32>,
pub completion_tokens: Option<u32>,
pub duration_ms: u64,
pub response_preview: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ToolCallTrace {
pub iteration: usize,
pub tool_name: String,
pub arguments: serde_json::Value,
pub success: bool,
pub output_preview: String,
pub duration_ms: u64,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalResult {
pub task_name: String,
pub passed: bool,
pub score: f64,
pub assertions_passed: usize,
pub assertions_failed: usize,
pub assertion_results: Vec<AssertionResult>,
pub trace: RunTrace,
pub error: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AssertionResult {
pub assertion: String,
pub passed: bool,
pub details: String,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct EvalReport {
pub suite_name: String,
pub ran_at: String,
pub duration_ms: u64,
pub overall_score: f64,
pub total_tasks: usize,
pub passed_tasks: usize,
pub failed_tasks: usize,
pub results: Vec<EvalResult>,
}
pub struct EvalRunner {
llm: Arc<dyn LLMProviderTrait>,
config: EvalConfig,
}
impl EvalRunner {
pub fn new(llm: Arc<dyn LLMProviderTrait>, config: EvalConfig) -> Self {
Self { llm, config }
}
#[instrument(skip(self), fields(suite = %self.config.name, task_count = self.config.tasks.len()))]
pub async fn run_suite(&self) -> EvalReport {
let started_at = chrono::Utc::now().to_rfc3339();
let suite_start = std::time::Instant::now();
let mut results = Vec::with_capacity(self.config.tasks.len());
info!(
suite = %self.config.name,
task_count = self.config.tasks.len(),
"Starting eval suite"
);
for task in &self.config.tasks {
let result = self.run_task(task).await;
let passed = result.passed;
let name = &result.task_name;
if passed {
info!(task = %name, score = result.score, "Eval task passed");
} else {
warn!(
task = %name,
score = result.score,
passed = result.assertions_passed,
failed = result.assertions_failed,
"Eval task failed"
);
}
results.push(result);
}
let duration_ms = suite_start.elapsed().as_millis() as u64;
let total_tasks = results.len();
let passed_tasks = results.iter().filter(|r| r.passed).count();
let failed_tasks = total_tasks - passed_tasks;
let overall_score = if total_tasks > 0 {
results
.iter()
.map(|r| r.score * r.trace.iterations as f64)
.sum::<f64>()
/ results
.iter()
.map(|r| r.trace.iterations as f64)
.sum::<f64>()
} else {
0.0
};
info!(
suite = %self.config.name,
passed = passed_tasks,
failed = failed_tasks,
overall_score = overall_score,
duration_ms = duration_ms,
"Eval suite completed"
);
EvalReport {
suite_name: self.config.name.clone(),
ran_at: started_at,
duration_ms,
overall_score,
total_tasks,
passed_tasks,
failed_tasks,
results,
}
}
#[instrument(skip(self), fields(task = %task.name))]
async fn run_task(&self, task: &EvalTask) -> EvalResult {
let task_start = std::time::Instant::now();
let started_at = chrono::Utc::now().to_rfc3339();
let agent_config = AgentLoopConfig {
max_iterations: self.config.max_iterations,
enable_tools: true,
require_approval: false,
prompt_injection_protection: true,
token_lifetime_secs: 0,
no_final_required: false,
fallback_chain: None,
token_budget: None,
ravenfabric: None,
checkpoint_dir: None,
session_id: None,
metrics_callback: None,
load_manager: None,
};
let result = run_agent_loop(
self.llm.clone(),
&task.prompt,
&self.config.system_prompt,
agent_config,
)
.await;
let duration_ms = task_start.elapsed().as_millis() as u64;
match result {
Ok(final_response) => {
let trace = RunTrace {
task_name: task.name.clone(),
started_at,
ended_at: chrono::Utc::now().to_rfc3339(),
duration_ms,
iterations: self.config.max_iterations, steps: vec![TraceStep {
number: 0,
step_type: StepType::Final,
content: final_response.clone(),
duration_ms,
}],
llm_calls: Vec::new(), tool_calls: Vec::new(), final_response: final_response.clone(),
};
let (assertion_results, assertions_passed, assertions_failed) =
check_assertions(&final_response, &task.assertions, Some(&trace));
let score = if task.assertions.is_empty() {
if final_response.is_empty() || final_response.len() < 10 {
0.0
} else {
1.0
}
} else if task.assertions.len() == assertions_passed + assertions_failed {
assertions_passed as f64 / task.assertions.len() as f64
} else {
0.0
};
let passed = assertions_failed == 0 && !final_response.is_empty();
EvalResult {
task_name: task.name.clone(),
passed,
score,
assertions_passed,
assertions_failed,
assertion_results,
trace,
error: None,
}
}
Err(e) => {
let trace = RunTrace {
task_name: task.name.clone(),
started_at,
ended_at: chrono::Utc::now().to_rfc3339(),
duration_ms,
iterations: 0,
steps: vec![TraceStep {
number: 0,
step_type: StepType::Error,
content: format!("Agent loop failed: {}", e),
duration_ms,
}],
llm_calls: Vec::new(),
tool_calls: Vec::new(),
final_response: String::new(),
};
EvalResult {
task_name: task.name.clone(),
passed: false,
score: 0.0,
assertions_passed: 0,
assertions_failed: 1,
assertion_results: vec![AssertionResult {
assertion: "agent_loop".to_string(),
passed: false,
details: format!("Agent loop failed: {}", e),
}],
trace,
error: Some(e.to_string()),
}
}
}
}
}
fn check_assertions(
response: &str,
assertions: &[Assertion],
run_trace: Option<&RunTrace>,
) -> (Vec<AssertionResult>, usize, usize) {
let mut results = Vec::with_capacity(assertions.len());
let mut passed = 0;
let mut failed = 0;
for assertion in assertions {
let result = check_single_assertion(response, assertion, run_trace);
if result.passed {
passed += 1;
} else {
failed += 1;
}
results.push(result);
}
(results, passed, failed)
}
fn check_single_assertion(
response: &str,
assertion: &Assertion,
run_trace: Option<&RunTrace>,
) -> AssertionResult {
match assertion {
Assertion::Contains(pattern) => {
let passed = response.contains(pattern);
AssertionResult {
assertion: format!("contains: {}", pattern),
passed,
details: if passed {
format!("Response contains '{}'", pattern)
} else {
format!("Response does not contain '{}'", pattern)
},
}
}
Assertion::NotContains(pattern) => {
let passed = !response.contains(pattern);
AssertionResult {
assertion: format!("not_contains: {}", pattern),
passed,
details: if passed {
format!("Response does not contain '{}'", pattern)
} else {
format!("Response contains '{}'", pattern)
},
}
}
Assertion::Exact(expected) => {
let trimmed_response = response.trim();
let passed = trimmed_response == expected.as_str();
AssertionResult {
assertion: format!("exact: {}", expected),
passed,
details: if passed {
"Response matches exactly".to_string()
} else {
format!(
"Expected '{}', got '{}'",
expected,
trimmed_response.chars().take(100).collect::<String>()
)
},
}
}
Assertion::Regex(pattern) => {
let re = regex_lite::Regex::new(pattern);
match re {
Ok(re) => {
let passed = re.is_match(response);
AssertionResult {
assertion: format!("regex: {}", pattern),
passed,
details: if passed {
format!("Response matches pattern '{}'", pattern)
} else {
format!("Response does not match pattern '{}'", pattern)
},
}
}
Err(e) => AssertionResult {
assertion: format!("regex: {}", pattern),
passed: false,
details: format!("Invalid regex pattern: {}", e),
},
}
}
Assertion::NonEmpty => {
let passed = !response.is_empty();
AssertionResult {
assertion: "non_empty".to_string(),
passed,
details: if passed {
format!("Response is non-empty ({} chars)", response.len())
} else {
"Response is empty".to_string()
},
}
}
Assertion::MinLength(min) => {
let passed = response.len() >= *min;
AssertionResult {
assertion: format!("min_length: {}", min),
passed,
details: if passed {
format!("Response length {} >= {}", response.len(), min)
} else {
format!("Response length {} < {}", response.len(), min)
},
}
}
Assertion::MaxLength(max) => {
let passed = response.len() <= *max;
AssertionResult {
assertion: format!("max_length: {}", max),
passed,
details: if passed {
format!("Response length {} <= {}", response.len(), max)
} else {
format!("Response length {} > {}", response.len(), max)
},
}
}
Assertion::ToolCalled(tool_name) => {
let tool_calls = run_trace
.map(|t| &t.tool_calls)
.filter(|calls| calls.iter().any(|tc| tc.tool_name == *tool_name));
let passed = tool_calls.is_some();
AssertionResult {
assertion: format!("tool_called: {}", tool_name),
passed,
details: if passed {
format!("Tool '{}' was called", tool_name)
} else {
let all_tools: Vec<&str> = run_trace
.map(|t| {
t.tool_calls
.iter()
.map(|tc| tc.tool_name.as_str())
.collect()
})
.unwrap_or_default();
if all_tools.is_empty() {
format!("Tool '{}' was not called (no tools were called)", tool_name)
} else {
format!(
"Tool '{}' was not called (called: {})",
tool_name,
all_tools.join(", ")
)
}
},
}
}
Assertion::ToolNotCalled(tool_name) => {
let tool_calls = run_trace
.map(|t| &t.tool_calls)
.filter(|calls| calls.iter().any(|tc| tc.tool_name == *tool_name));
let passed = tool_calls.is_none();
AssertionResult {
assertion: format!("tool_not_called: {}", tool_name),
passed,
details: if passed {
format!("Tool '{}' was not called", tool_name)
} else {
format!("Tool '{}' was called but should not have been", tool_name)
},
}
}
}
}
impl EvalReport {
pub fn format_text(&self) -> String {
let mut output = String::new();
output.push_str(&format!("\n🐦⬛ Eval Report: {}\n", self.suite_name));
output.push_str(&format!("{:-^60}\n", ""));
output.push_str(&format!(
"Ran at: {}\n",
&self.ran_at[..19].replace('T', " ")
));
output.push_str(&format!("Duration: {} ms\n", self.duration_ms));
output.push_str(&format!(
"Overall score: {:.1}%\n",
self.overall_score * 100.0
));
output.push_str(&format!(
"Tasks: {}/{} passed\n",
self.passed_tasks, self.total_tasks
));
output.push_str(&format!("{:-^60}\n", ""));
for result in &self.results {
output.push_str(&format!(
"\n {} {} — {:.1}%\n",
if result.passed { "✅" } else { "❌" },
result.task_name,
result.score * 100.0
));
if let Some(ref error) = result.error {
output.push_str(&format!(" Error: {}\n", error));
}
if !result.assertion_results.is_empty() {
for ar in &result.assertion_results {
output.push_str(&format!(
" {} {}\n",
if ar.passed { " ✅" } else { " ❌" },
ar.details
));
}
}
let trace = &result.trace;
output.push_str(&format!(
" Iterations: {} · LLM calls: {} · Tool calls: {} · Duration: {} ms\n",
trace.iterations,
trace.llm_calls.len(),
trace.tool_calls.len(),
trace.duration_ms
));
let preview: String = trace.final_response.chars().take(200).collect();
if !preview.is_empty() {
output.push_str(&format!(" Response: {}\n", preview));
}
}
output
}
pub fn format_json(&self) -> serde_json::Value {
serde_json::to_value(self).unwrap_or(serde_json::json!({"error": "serialization failed"}))
}
}
impl EvalConfig {
pub fn from_file(path: &str) -> Result<Self> {
let content = std::fs::read_to_string(path).map_err(|e| {
RavenClawsError::CommandExecution(format!("Failed to read eval config: {}", e))
})?;
if content.trim().is_empty() {
return Err(RavenClawsError::CommandExecution(format!(
"Eval config file '{}' is empty — no tasks to run",
path
)));
}
let config: EvalConfig = toml::from_str(&content).map_err(|e| {
RavenClawsError::CommandExecution(format!("Failed to parse eval config: {}", e))
})?;
if config.tasks.is_empty() {
return Err(RavenClawsError::CommandExecution(format!(
"Eval config file '{}' has no tasks defined",
path
)));
}
Ok(config)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_assertion_contains_pass() {
let result = check_single_assertion(
"hello world",
&Assertion::Contains("world".to_string()),
None,
);
assert!(result.passed);
assert!(result.details.contains("contains"));
}
#[test]
fn test_assertion_contains_fail() {
let result =
check_single_assertion("hello world", &Assertion::Contains("foo".to_string()), None);
assert!(!result.passed);
}
#[test]
fn test_assertion_not_contains_pass() {
let result = check_single_assertion(
"hello world",
&Assertion::NotContains("foo".to_string()),
None,
);
assert!(result.passed);
}
#[test]
fn test_assertion_not_contains_fail() {
let result = check_single_assertion(
"hello world",
&Assertion::NotContains("world".to_string()),
None,
);
assert!(!result.passed);
}
#[test]
fn test_assertion_exact_pass() {
let result = check_single_assertion("hello", &Assertion::Exact("hello".to_string()), None);
assert!(result.passed);
}
#[test]
fn test_assertion_exact_fail() {
let result = check_single_assertion("world", &Assertion::Exact("hello".to_string()), None);
assert!(!result.passed);
}
#[test]
fn test_assertion_regex_pass() {
let result =
check_single_assertion("hello 123", &Assertion::Regex(r"\d+".to_string()), None);
assert!(result.passed);
}
#[test]
fn test_assertion_regex_fail() {
let result = check_single_assertion("hello", &Assertion::Regex(r"\d+".to_string()), None);
assert!(!result.passed);
}
#[test]
fn test_assertion_non_empty_pass() {
let result = check_single_assertion("hello", &Assertion::NonEmpty, None);
assert!(result.passed);
}
#[test]
fn test_assertion_non_empty_fail() {
let result = check_single_assertion("", &Assertion::NonEmpty, None);
assert!(!result.passed);
}
#[test]
fn test_assertion_min_length_pass() {
let result = check_single_assertion("hello", &Assertion::MinLength(3), None);
assert!(result.passed);
}
#[test]
fn test_assertion_min_length_fail() {
let result = check_single_assertion("hi", &Assertion::MinLength(5), None);
assert!(!result.passed);
}
#[test]
fn test_assertion_max_length_pass() {
let result = check_single_assertion("hi", &Assertion::MaxLength(5), None);
assert!(result.passed);
}
#[test]
fn test_assertion_max_length_fail() {
let result = check_single_assertion("hello world", &Assertion::MaxLength(5), None);
assert!(!result.passed);
}
#[test]
fn test_check_assertions_empty() {
let (results, passed, failed) = check_assertions("hello", &[], None);
assert!(results.is_empty());
assert_eq!(passed, 0);
assert_eq!(failed, 0);
}
#[test]
fn test_check_assertions_multiple() {
let assertions = vec![
Assertion::Contains("hello".to_string()),
Assertion::Contains("world".to_string()),
Assertion::NonEmpty,
];
let (results, passed, failed) = check_assertions("hello world", &assertions, None);
assert_eq!(passed, 3);
assert_eq!(failed, 0);
assert_eq!(results.len(), 3);
}
#[test]
fn test_check_assertions_tool_called() {
let trace = RunTrace {
task_name: "test".to_string(),
started_at: "2026-01-01T00:00:00Z".to_string(),
ended_at: "2026-01-01T00:00:01Z".to_string(),
duration_ms: 1000,
iterations: 1,
steps: vec![],
llm_calls: vec![],
tool_calls: vec![
ToolCallTrace {
iteration: 0,
tool_name: "web_search".to_string(),
arguments: serde_json::json!({"query": "test"}),
success: true,
output_preview: "results".to_string(),
duration_ms: 100,
},
ToolCallTrace {
iteration: 0,
tool_name: "read_file".to_string(),
arguments: serde_json::json!({"path": "/tmp/test"}),
success: true,
output_preview: "content".to_string(),
duration_ms: 50,
},
],
final_response: "response".to_string(),
};
let (results, passed, failed) = check_assertions(
"response",
&[Assertion::ToolCalled("web_search".to_string())],
Some(&trace),
);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
assert!(results[0].passed);
let (results, passed, failed) = check_assertions(
"response",
&[Assertion::ToolCalled("nonexistent".to_string())],
Some(&trace),
);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
assert!(!results[0].passed);
let (results, passed, failed) = check_assertions(
"response",
&[Assertion::ToolNotCalled("nonexistent".to_string())],
Some(&trace),
);
assert_eq!(passed, 1);
assert_eq!(failed, 0);
assert!(results[0].passed);
let (results, passed, failed) = check_assertions(
"response",
&[Assertion::ToolNotCalled("web_search".to_string())],
Some(&trace),
);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
assert!(!results[0].passed);
let (results, passed, failed) = check_assertions(
"response",
&[Assertion::ToolCalled("web_search".to_string())],
None,
);
assert_eq!(passed, 0);
assert_eq!(failed, 1);
assert!(!results[0].passed);
}
#[test]
fn test_eval_config_from_toml() {
let toml_str = r#"
name = "test-suite"
description = "A test suite"
system_prompt = "Be concise"
max_iterations = 3
[[tasks]]
name = "test-1"
prompt = "What is 2+2?"
golden = "4"
assertions = [{ type = "contains", value = "4" }]
weight = 1.0
required = true
"#;
let config: EvalConfig = toml::from_str(toml_str).unwrap();
assert_eq!(config.name, "test-suite");
assert_eq!(config.tasks.len(), 1);
assert_eq!(config.tasks[0].name, "test-1");
assert_eq!(config.tasks[0].prompt, "What is 2+2?");
assert_eq!(config.tasks[0].golden, "4");
assert_eq!(config.tasks[0].assertions.len(), 1);
}
#[test]
fn test_eval_config_defaults() {
let toml_str = r#"
[[tasks]]
name = "simple"
prompt = "Say hello"
"#;
let config: EvalConfig = toml::from_str(toml_str).unwrap();
assert_eq!(config.name, "unnamed");
assert_eq!(config.system_prompt, default_system_prompt());
assert_eq!(config.max_iterations, 5);
assert_eq!(config.tasks[0].weight, 1.0);
assert!(!config.tasks[0].required);
}
#[test]
fn test_report_format_text() {
let report = EvalReport {
suite_name: "test".to_string(),
ran_at: "2026-06-22T12:00:00+00:00".to_string(),
duration_ms: 100,
overall_score: 0.75,
total_tasks: 2,
passed_tasks: 1,
failed_tasks: 1,
results: vec![
EvalResult {
task_name: "pass-task".to_string(),
passed: true,
score: 1.0,
assertions_passed: 2,
assertions_failed: 0,
assertion_results: vec![AssertionResult {
assertion: "contains: hello".to_string(),
passed: true,
details: "Response contains 'hello'".to_string(),
}],
trace: RunTrace {
task_name: "pass-task".to_string(),
started_at: "2026-06-22T12:00:00+00:00".to_string(),
ended_at: "2026-06-22T12:00:01+00:00".to_string(),
duration_ms: 50,
iterations: 1,
steps: vec![],
llm_calls: vec![],
tool_calls: vec![],
final_response: "hello world".to_string(),
},
error: None,
},
EvalResult {
task_name: "fail-task".to_string(),
passed: false,
score: 0.0,
assertions_passed: 0,
assertions_failed: 1,
assertion_results: vec![AssertionResult {
assertion: "contains: foo".to_string(),
passed: false,
details: "Response does not contain 'foo'".to_string(),
}],
trace: RunTrace {
task_name: "fail-task".to_string(),
started_at: "2026-06-22T12:00:01+00:00".to_string(),
ended_at: "2026-06-22T12:00:02+00:00".to_string(),
duration_ms: 50,
iterations: 1,
steps: vec![],
llm_calls: vec![],
tool_calls: vec![],
final_response: "bar".to_string(),
},
error: None,
},
],
};
let text = report.format_text();
assert!(text.contains("Eval Report: test"));
assert!(text.contains("75.0%"));
assert!(text.contains("1/2 passed"));
assert!(text.contains("✅ pass-task"));
assert!(text.contains("❌ fail-task"));
}
#[test]
fn test_report_format_json() {
let report = EvalReport {
suite_name: "test".to_string(),
ran_at: "2026-06-22T12:00:00+00:00".to_string(),
duration_ms: 100,
overall_score: 1.0,
total_tasks: 1,
passed_tasks: 1,
failed_tasks: 0,
results: vec![],
};
let json = report.format_json();
assert_eq!(json["suite_name"], "test");
assert_eq!(json["overall_score"], 1.0);
}
#[test]
fn test_eval_config_from_file_not_found() {
let result = EvalConfig::from_file("/tmp/nonexistent-eval-config.toml");
assert!(result.is_err());
}
#[test]
fn test_assertion_regex_invalid_pattern() {
let result =
check_single_assertion("hello", &Assertion::Regex(r"[invalid".to_string()), None);
assert!(!result.passed);
assert!(result.details.contains("Invalid regex"));
}
#[test]
fn test_trace_step_serialization() {
let step = TraceStep {
number: 0,
step_type: StepType::Thought,
content: "test".to_string(),
duration_ms: 100,
};
let json = serde_json::to_string(&step).unwrap();
assert!(json.contains("Thought"));
}
#[test]
fn test_tool_call_trace_serialization() {
let trace = ToolCallTrace {
iteration: 0,
tool_name: "shell_exec".to_string(),
arguments: serde_json::json!({"command": "echo hello"}),
success: true,
output_preview: "hello".to_string(),
duration_ms: 50,
};
let json = serde_json::to_string(&trace).unwrap();
assert!(json.contains("shell_exec"));
assert!(json.contains("echo hello"));
}
}