use std::collections::HashMap;
use std::path::PathBuf;
use std::sync::Arc;
use tokio::sync::mpsc;
use crate::agent::Agent;
use crate::channels::{ChannelHub, SessionMap};
use crate::config::{AppConfig, IterationLimitConfig, ProviderKind};
use crate::events::EventStore;
use crate::llm_runtime::{router_from_models, SharedLlmRuntime};
use crate::memory::embeddings::EmbeddingService;
use crate::state::SqliteStateStore;
use crate::testing::TestChannel;
use crate::tools::command_risk::PermissionMode;
use crate::tools::memory::RememberFactTool;
use crate::tools::{SystemInfoTool, TerminalTool};
use crate::traits::{Channel, ModelProvider};
use crate::types::{ChannelContext, UserRole};
fn should_run_live_tests() -> bool {
std::env::var("AIDAEMON_LIVE_TEST").is_ok_and(|v| v == "1")
}
async fn setup_live_agent() -> anyhow::Result<LiveTestHarness> {
let config_path = PathBuf::from("config.toml");
if !config_path.exists() {
return Err(anyhow::anyhow!(
"config.toml not found — live tests require a valid config"
));
}
let mut config = AppConfig::load(&config_path)?;
config.provider.models.apply_defaults(&config.provider.kind);
let provider: Arc<dyn ModelProvider> = match config.provider.kind {
ProviderKind::OpenaiCompatible => Arc::new(
crate::providers::OpenAiCompatibleProvider::new(
&config.provider.base_url,
&config.provider.api_key,
)
.map_err(|e| anyhow::anyhow!("{}", e))?,
),
ProviderKind::XaiNative => Arc::new(
crate::providers::XaiNativeProvider::new_with_options(
&config.provider.api_key,
Some(&config.provider.base_url),
config.provider.max_tokens,
config.provider.extra_headers.clone(),
)
.map_err(|e| anyhow::anyhow!("{}", e))?,
),
ProviderKind::GoogleGenai => Arc::new(crate::providers::GoogleGenAiProvider::new(
&config.provider.api_key,
)),
ProviderKind::Anthropic => Arc::new(crate::providers::AnthropicNativeProvider::new(
&config.provider.api_key,
)),
};
let db_file = tempfile::NamedTempFile::new()?;
let db_path = db_file.path().to_str().unwrap().to_string();
let skills_dir = tempfile::TempDir::new()?;
let embedding_service = Arc::new(EmbeddingService::new()?);
let state = Arc::new(SqliteStateStore::new(&db_path, 100, None, embedding_service).await?);
let pool = sqlx::SqlitePool::connect(&format!("sqlite:{}", db_path)).await?;
let event_store = Arc::new(EventStore::new(pool.clone()).await?);
let (approval_tx, approval_rx) = mpsc::channel(16);
let terminal_tool = Arc::new(
TerminalTool::new(
vec!["*".to_string()],
approval_tx,
30,
8000,
PermissionMode::Yolo,
pool,
)
.await,
);
let tools: Vec<Arc<dyn crate::traits::Tool>> = vec![
Arc::new(SystemInfoTool),
Arc::new(RememberFactTool::new(
state.clone() as Arc<dyn crate::traits::StateStore>
)),
terminal_tool,
];
let model = config.provider.models.primary.clone();
let llm_runtime = SharedLlmRuntime::new(
provider.clone(),
router_from_models(config.provider.models.clone()),
config.provider.kind.clone(),
model.clone(),
);
let agent = Agent::new(
llm_runtime,
state.clone() as Arc<dyn crate::traits::StateStore>,
event_store,
tools,
model,
"You are a helpful assistant. Use terminal commands to gather information when asked."
.to_string(),
config_path,
skills_dir.path().to_path_buf(),
3,
50,
100,
8000,
30,
20,
None,
IterationLimitConfig::Unlimited,
None,
None,
None,
None,
None, None, true, crate::config::ContextWindowConfig {
progressive_facts: false,
..Default::default()
},
crate::config::PolicyConfig::default(),
crate::config::PathAliasConfig::default(),
None,
);
let channel = Arc::new(TestChannel::new());
let mut map = HashMap::new();
map.insert("live_test".to_string(), "test".to_string());
let session_map: SessionMap = Arc::new(tokio::sync::RwLock::new(map));
let hub = Arc::new(ChannelHub::new(
vec![channel.clone() as Arc<dyn Channel>],
session_map,
));
let hub_for_approvals = hub.clone();
let approval_task = tokio::spawn(async move {
hub_for_approvals.approval_listener(approval_rx).await;
});
Ok(LiveTestHarness {
agent,
state,
_channel: channel,
_hub: hub,
_db_file: db_file,
_skills_dir: skills_dir,
_approval_task: approval_task,
})
}
#[allow(dead_code)]
struct LiveTestHarness {
agent: Agent,
state: Arc<SqliteStateStore>,
_channel: Arc<TestChannel>,
_hub: Arc<ChannelHub>,
_db_file: tempfile::NamedTempFile,
_skills_dir: tempfile::TempDir,
_approval_task: tokio::task::JoinHandle<()>,
}
#[tokio::test]
async fn test_live_complex_prompt_no_stall() {
if !should_run_live_tests() {
eprintln!("Skipping live test (set AIDAEMON_LIVE_TEST=1 to run)");
return;
}
let harness = setup_live_agent()
.await
.expect("Failed to set up live agent");
let response = tokio::time::timeout(
std::time::Duration::from_secs(120),
harness.agent.handle_message(
"live_test",
"Check this system thoroughly — OS, disk, running processes, and installed runtimes \
(node, python, rust). Give me a complete report.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Live test timed out after 120s")
.expect("Live test agent returned error");
assert!(
!response.contains("stuck in a loop"),
"Live agent should not trigger stall detection. Got: {}",
&response[..response.len().min(500)]
);
assert!(
response.len() > 100,
"Live agent should produce a substantial response. Got {} chars: {}",
response.len(),
&response[..response.len().min(200)]
);
}
async fn setup_live_agent_with_prompt(system_prompt: &str) -> anyhow::Result<LiveTestHarness> {
let config_path = PathBuf::from("config.toml");
if !config_path.exists() {
return Err(anyhow::anyhow!(
"config.toml not found — live tests require a valid config"
));
}
let mut config = AppConfig::load(&config_path)?;
config.provider.models.apply_defaults(&config.provider.kind);
let provider: Arc<dyn ModelProvider> = match config.provider.kind {
ProviderKind::OpenaiCompatible => Arc::new(
crate::providers::OpenAiCompatibleProvider::new(
&config.provider.base_url,
&config.provider.api_key,
)
.map_err(|e| anyhow::anyhow!("{}", e))?,
),
ProviderKind::XaiNative => Arc::new(
crate::providers::XaiNativeProvider::new_with_options(
&config.provider.api_key,
Some(&config.provider.base_url),
config.provider.max_tokens,
config.provider.extra_headers.clone(),
)
.map_err(|e| anyhow::anyhow!("{}", e))?,
),
ProviderKind::GoogleGenai => Arc::new(crate::providers::GoogleGenAiProvider::new(
&config.provider.api_key,
)),
ProviderKind::Anthropic => Arc::new(crate::providers::AnthropicNativeProvider::new(
&config.provider.api_key,
)),
};
let db_file = tempfile::NamedTempFile::new()?;
let db_path = db_file.path().to_str().unwrap().to_string();
let skills_dir = tempfile::TempDir::new()?;
let embedding_service = Arc::new(EmbeddingService::new()?);
let state = Arc::new(SqliteStateStore::new(&db_path, 100, None, embedding_service).await?);
let pool = sqlx::SqlitePool::connect(&format!("sqlite:{}", db_path)).await?;
let event_store = Arc::new(EventStore::new(pool.clone()).await?);
let (approval_tx, approval_rx) = mpsc::channel(16);
let terminal_tool = Arc::new(
TerminalTool::new(
vec!["*".to_string()],
approval_tx,
30,
8000,
PermissionMode::Yolo,
pool,
)
.await,
);
let tools: Vec<Arc<dyn crate::traits::Tool>> = vec![
Arc::new(SystemInfoTool),
Arc::new(RememberFactTool::new(
state.clone() as Arc<dyn crate::traits::StateStore>
)),
terminal_tool,
];
let model = config.provider.models.primary.clone();
let llm_runtime = SharedLlmRuntime::new(
provider.clone(),
router_from_models(config.provider.models.clone()),
config.provider.kind.clone(),
model.clone(),
);
let agent = Agent::new(
llm_runtime,
state.clone() as Arc<dyn crate::traits::StateStore>,
event_store,
tools,
model,
system_prompt.to_string(),
config_path,
skills_dir.path().to_path_buf(),
3,
50,
100,
8000,
30,
20,
None,
IterationLimitConfig::Unlimited,
None,
None,
None,
None,
None,
None,
true,
crate::config::ContextWindowConfig {
progressive_facts: false,
..Default::default()
},
crate::config::PolicyConfig::default(),
crate::config::PathAliasConfig::default(),
None,
);
let channel = Arc::new(TestChannel::new());
let mut map = HashMap::new();
map.insert("live_test".to_string(), "test".to_string());
let session_map: SessionMap = Arc::new(tokio::sync::RwLock::new(map));
let hub = Arc::new(ChannelHub::new(
vec![channel.clone() as Arc<dyn Channel>],
session_map,
));
let hub_for_approvals = hub.clone();
let approval_task = tokio::spawn(async move {
hub_for_approvals.approval_listener(approval_rx).await;
});
Ok(LiveTestHarness {
agent,
state,
_channel: channel,
_hub: hub,
_db_file: db_file,
_skills_dir: skills_dir,
_approval_task: approval_task,
})
}
#[tokio::test]
async fn test_live_simple_response() {
if !should_run_live_tests() {
eprintln!("Skipping live test (set AIDAEMON_LIVE_TEST=1 to run)");
return;
}
let harness = setup_live_agent()
.await
.expect("Failed to set up live agent");
let response = tokio::time::timeout(
std::time::Duration::from_secs(30),
harness.agent.handle_message(
"live_test",
"What is 2+2? Answer with just the number.",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Live test timed out after 30s")
.expect("Live test agent returned error");
assert!(
response.contains('4'),
"Live agent should answer 2+2=4. Got: {}",
response
);
}
const BEHAVIORAL_SYSTEM_PROMPT: &str = "\
You are a helpful AI assistant running on the user's local machine. \
You have full access to the filesystem via the `terminal` tool.\n\n\
## CRITICAL RULES (MUST FOLLOW)\n\n\
1. **You CAN access any file or folder** on this machine using `terminal`. \
NEVER say \"I can't access\", \"I don't have access\", or \"I can't browse\" — that is FALSE.\n\n\
2. **When the user gives a path or location hint** (e.g. \"it's in /tmp/foo\", \"check the projects folder\"), \
you MUST immediately run `ls` or `cat` on that path using `terminal`. Do NOT ask for clarification — ACT.\n\n\
3. **When the user says \"check it yourself\"**, you MUST use `terminal` to explore. \
Asking another question after \"check it yourself\" is WRONG.\n\n\
4. **Fuzzy-match names**: If the user says \"site-cars\" but the directory is \"cars-site\", \
recognize the match. List the directory first, then find the closest match.\n\n\
5. **Local-first**: When the user says something is on the local filesystem, \
ALWAYS use `terminal` (ls, cat, find). NEVER use web_search for local files.\n\n\
6. After using `terminal`, include the results in your response.";
#[tokio::test]
async fn test_live_behavior_explores_on_location_hint() {
if !should_run_live_tests() {
eprintln!("Skipping live test (set AIDAEMON_LIVE_TEST=1 to run)");
return;
}
let workspace = tempfile::TempDir::new().expect("Failed to create temp dir");
let projects_dir = workspace.path().join("projects");
std::fs::create_dir(&projects_dir).unwrap();
std::fs::create_dir(projects_dir.join("cars-site")).unwrap();
std::fs::create_dir(projects_dir.join("blog")).unwrap();
std::fs::create_dir(projects_dir.join("portfolio")).unwrap();
std::fs::write(
projects_dir.join("cars-site").join("README.md"),
"# Cars Site\nDeployed at: https://cars-site.example.com\n",
)
.unwrap();
let harness = setup_live_agent_with_prompt(BEHAVIORAL_SYSTEM_PROMPT)
.await
.expect("Failed to set up live agent");
let session = "behavior_location_hint";
let workspace_path = workspace.path().to_str().unwrap();
let _response1 = tokio::time::timeout(
std::time::Duration::from_secs(60),
harness.agent.handle_message(
session,
"What's in the site-cars project?",
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Turn 1 timed out")
.expect("Turn 1 error");
let response2 = tokio::time::timeout(
std::time::Duration::from_secs(60),
harness.agent.handle_message(
session,
&format!(
"It's in the projects folder at {}. Check it yourself.",
workspace_path
),
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Turn 2 timed out")
.expect("Turn 2 error");
let resp_lower = response2.to_lowercase();
let found_directory_content = resp_lower.contains("cars-site")
|| resp_lower.contains("cars site")
|| resp_lower.contains("blog")
|| resp_lower.contains("portfolio");
assert!(
found_directory_content,
"Agent should have explored the directory and found its contents \
(cars-site, blog, portfolio). Got: {}",
&response2[..response2.len().min(500)]
);
let asks_clarification = resp_lower.contains("clarify")
|| resp_lower.contains("could you")
|| resp_lower.contains("which site")
|| resp_lower.contains("which project");
assert!(
!asks_clarification,
"Agent should NOT ask for clarification when given an explicit path. Got: {}",
&response2[..response2.len().min(500)]
);
let bad_phrases = [
"don't have access",
"can't access",
"cannot access",
"can't browse",
"cannot browse",
"unable to browse",
"unable to access",
"not able to access",
];
for phrase in &bad_phrases {
assert!(
!resp_lower.contains(phrase),
"Agent falsely claimed it can't access files. Found '{}' in: {}",
phrase,
&response2[..response2.len().min(500)]
);
}
}
#[tokio::test]
async fn test_live_behavior_never_claims_no_access() {
if !should_run_live_tests() {
eprintln!("Skipping live test (set AIDAEMON_LIVE_TEST=1 to run)");
return;
}
let workspace = tempfile::TempDir::new().expect("Failed to create temp dir");
std::fs::write(
workspace.path().join("hello.txt"),
"Hello from behavioral test!",
)
.unwrap();
let harness = setup_live_agent_with_prompt(BEHAVIORAL_SYSTEM_PROMPT)
.await
.expect("Failed to set up live agent");
let session = "behavior_no_access_claim";
let file_path = workspace.path().join("hello.txt");
let response = tokio::time::timeout(
std::time::Duration::from_secs(60),
harness.agent.handle_message(
session,
&format!(
"Read the file at {} and tell me what it says.",
file_path.display()
),
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Test timed out")
.expect("Test error");
assert!(
response.contains("Hello from behavioral test"),
"Agent should have read and reported the file contents. Got: {}",
&response[..response.len().min(500)]
);
let bad_phrases = [
"don't have access",
"can't access",
"cannot access",
"can't browse",
"cannot browse",
"unable to access",
];
let resp_lower = response.to_lowercase();
for phrase in &bad_phrases {
assert!(
!resp_lower.contains(phrase),
"Agent falsely claimed inability. Found '{}' in response",
phrase
);
}
}
#[tokio::test]
async fn test_live_behavior_no_web_search_for_local_files() {
if !should_run_live_tests() {
eprintln!("Skipping live test (set AIDAEMON_LIVE_TEST=1 to run)");
return;
}
let workspace = tempfile::TempDir::new().expect("Failed to create temp dir");
let projects_dir = workspace.path().join("projects");
std::fs::create_dir(&projects_dir).unwrap();
std::fs::create_dir(projects_dir.join("my-app")).unwrap();
std::fs::write(
projects_dir.join("my-app").join("package.json"),
r#"{"name": "my-app", "version": "1.0.0"}"#,
)
.unwrap();
let harness = setup_live_agent_with_prompt(BEHAVIORAL_SYSTEM_PROMPT)
.await
.expect("Failed to set up live agent");
let session = "behavior_no_web_search";
let response = tokio::time::timeout(
std::time::Duration::from_secs(60),
harness.agent.handle_message(
session,
&format!(
"List the projects in {}. What's in there?",
workspace.path().display()
),
None,
UserRole::Owner,
ChannelContext::private("telegram"),
None,
),
)
.await
.expect("Test timed out")
.expect("Test error");
let resp_lower = response.to_lowercase();
assert!(
resp_lower.contains("my-app"),
"Agent should have listed the directory and found 'my-app'. Got: {}",
&response[..response.len().min(500)]
);
let asks_clarification = resp_lower.contains("clarify")
|| resp_lower.contains("could you specify")
|| resp_lower.contains("which project");
assert!(
!asks_clarification,
"Agent should NOT ask for clarification when given an explicit path. Got: {}",
&response[..response.len().min(500)]
);
}