steer_core/runners/
one_shot_runner.rs

1use serde::{Deserialize, Serialize};
2use tracing::{error, info};
3
4use crate::api::Model;
5use crate::app::conversation::{MessageData, UserContent};
6use crate::app::{AppCommand, AppConfig, Message};
7use crate::config::LlmConfigProvider;
8use crate::error::{Error, Result};
9use crate::session::state::WorkspaceConfig;
10
11#[cfg(not(test))]
12use crate::auth::DefaultAuthStorage;
13
14use crate::session::{
15    manager::SessionManager,
16    state::{SessionConfig, SessionToolConfig, ToolApprovalPolicy},
17};
18
19/// Contains the result of a single agent run
20#[derive(Debug, Clone, Serialize, Deserialize)]
21pub struct RunOnceResult {
22    /// The final assistant message after all tools have been executed
23    pub final_message: Message,
24    /// The session ID of the session that was used
25    pub session_id: String,
26}
27
28/// Orchestrates single non-interactive agent loop executions using the session system.
29///
30/// All OneShotRunner operations now use the unified session-based architecture,
31/// providing consistent tool configuration, approval policies, and persistence.
32pub struct OneShotRunner;
33
34impl Default for OneShotRunner {
35    fn default() -> Self {
36        Self::new()
37    }
38}
39
40impl OneShotRunner {
41    /// Creates a new OneShotRunner
42    pub fn new() -> Self {
43        Self
44    }
45
46    /// Run a one-shot task in an existing session
47    pub async fn run_in_session(
48        session_manager: &SessionManager,
49        session_id: String,
50        message: String,
51    ) -> Result<RunOnceResult> {
52        // 1. Resume or activate the session if not already active
53        #[cfg(not(test))]
54        let app_config = {
55            let storage = std::sync::Arc::new(DefaultAuthStorage::new()?);
56            AppConfig {
57                llm_config_provider: LlmConfigProvider::new(storage),
58            }
59        };
60
61        #[cfg(test)]
62        let app_config = {
63            let storage = std::sync::Arc::new(crate::test_utils::InMemoryAuthStorage::new());
64            AppConfig {
65                llm_config_provider: LlmConfigProvider::new(storage),
66            }
67        };
68
69        let command_tx = session_manager
70            .resume_session(&session_id, app_config)
71            .await?;
72
73        // 2. Take the event receiver for this session (like TUI does)
74        let event_rx = session_manager.take_event_receiver(&session_id).await?;
75
76        info!(session_id = %session_id, message = %message, "Sending message to session");
77
78        // 3. Send the user message
79        command_tx
80            .send(AppCommand::ProcessUserInput(message))
81            .await
82            .map_err(|e| {
83                Error::InvalidOperation(format!(
84                    "Failed to send message to session {session_id}: {e}"
85                ))
86            })?;
87
88        // 4. Process events to build the result (similar to TUI's event loop)
89        let result = Self::process_events(event_rx, &session_id).await;
90
91        if let Err(e) = session_manager.suspend_session(&session_id).await {
92            error!(session_id = %session_id, error = %e, "Failed to suspend session");
93        } else {
94            info!(session_id = %session_id, "Session suspended successfully");
95        }
96
97        // Return the result
98        result
99    }
100
101    /// Run a one-shot task in a new ephemeral session
102    pub async fn run_ephemeral(
103        session_manager: &SessionManager,
104        init_msgs: Vec<Message>,
105        model: Model,
106        tool_config: Option<SessionToolConfig>,
107        tool_policy: Option<ToolApprovalPolicy>,
108        system_prompt: Option<String>,
109    ) -> Result<RunOnceResult> {
110        // 1. Create ephemeral session with specified tool policy
111        let session_config = if let Some(config) = tool_config {
112            // Use provided tool config
113            let mut final_tool_config = config;
114            // Apply the tool policy if provided
115            if let Some(policy) = tool_policy {
116                final_tool_config.approval_policy = policy;
117            }
118
119            SessionConfig {
120                workspace: WorkspaceConfig::default(),
121                tool_config: final_tool_config,
122                system_prompt,
123                metadata: [
124                    ("mode".to_string(), "headless".to_string()),
125                    ("ephemeral".to_string(), "true".to_string()),
126                    ("created_by".to_string(), "one_shot_runner".to_string()),
127                    ("model".to_string(), model.to_string()),
128                ]
129                .into_iter()
130                .collect(),
131            }
132        } else {
133            // Use the default session config
134            let mut default_config = crate::utils::session::create_default_session_config();
135
136            // Apply the tool policy if provided
137            if let Some(policy) = tool_policy {
138                default_config.tool_config.approval_policy = policy;
139            }
140
141            // Update metadata
142            default_config.metadata = [
143                ("mode".to_string(), "headless".to_string()),
144                ("ephemeral".to_string(), "true".to_string()),
145                ("created_by".to_string(), "one_shot_runner".to_string()),
146                ("model".to_string(), model.to_string()),
147            ]
148            .into_iter()
149            .collect();
150
151            // Apply custom system prompt if provided
152            if system_prompt.is_some() {
153                default_config.system_prompt = system_prompt;
154            }
155
156            default_config
157        };
158
159        #[cfg(not(test))]
160        let app_config = {
161            let storage = std::sync::Arc::new(DefaultAuthStorage::new()?);
162            AppConfig {
163                llm_config_provider: LlmConfigProvider::new(storage),
164            }
165        };
166
167        #[cfg(test)]
168        let app_config = {
169            let storage = std::sync::Arc::new(crate::test_utils::InMemoryAuthStorage::new());
170            AppConfig {
171                llm_config_provider: LlmConfigProvider::new(storage),
172            }
173        };
174
175        let (session_id, _command_tx) = session_manager
176            .create_session(session_config, app_config)
177            .await?;
178
179        // 3. Process the final user message (this triggers the actual processing)
180        let user_content = match init_msgs.last() {
181            Some(message) => {
182                // Extract text content from the message
183                match &message.data {
184                    MessageData::User { content, .. } => {
185                        let text_content = content.iter().find_map(|c| match c {
186                            UserContent::Text { text } => Some(text.clone()),
187                            _ => None,
188                        });
189                        match text_content {
190                            Some(content) => content,
191                            None => {
192                                return Err(Error::InvalidOperation(
193                                    "Last message must contain text content".to_string(),
194                                ));
195                            }
196                        }
197                    }
198                    _ => {
199                        return Err(Error::InvalidOperation(
200                            "Last message must be from User".to_string(),
201                        ));
202                    }
203                }
204            }
205            None => {
206                return Err(Error::InvalidOperation(
207                    "No user message to process".to_string(),
208                ));
209            }
210        };
211
212        // 4. Run the main task using the session
213        Self::run_in_session(session_manager, session_id.clone(), user_content).await
214    }
215
216    /// Process events from the session and return the final result
217    async fn process_events(
218        mut event_rx: tokio::sync::mpsc::Receiver<crate::app::AppEvent>,
219        session_id: &str,
220    ) -> Result<RunOnceResult> {
221        use crate::app::AppEvent;
222
223        let mut messages = Vec::new();
224        info!(session_id = %session_id, "Starting event processing loop");
225
226        while let Some(event) = event_rx.recv().await {
227            match event {
228                AppEvent::MessageAdded { message, model: _ } => {
229                    info!(session_id = %session_id, role = ?message.role(), id = %message.id(), "MessageAdded event");
230                    messages.push(message);
231                }
232
233                AppEvent::MessageUpdated { id, .. } => {
234                    info!(session_id = %session_id, id = %id, "MessageUpdated event");
235                    // We'll get the final message in MessageAdded, so we can ignore updates
236                }
237
238                AppEvent::ProcessingCompleted => {
239                    info!(session_id = %session_id, "ProcessingCompleted event received");
240                    // Check if we have an assistant message
241                    if !messages.is_empty() {
242                        info!(session_id = %session_id, "Final message received, exiting event loop");
243                        break;
244                    }
245                }
246
247                AppEvent::Error { message } => {
248                    error!(session_id = %session_id, error = %message, "Error event");
249                    return Err(Error::InvalidOperation(format!(
250                        "Error during processing: {message}"
251                    )));
252                }
253
254                AppEvent::RequestToolApproval { .. } => {
255                    info!(session_id = %session_id, "RequestToolApproval event - this shouldn't happen in headless mode");
256                    // In headless mode, tools should be pre-approved or denied by policy
257                }
258
259                _ => {
260                    // Ignore other events like ProcessingStarted, ModelChanged, etc.
261                }
262            }
263        }
264
265        // Return the result
266        match messages.last() {
267            Some(_) => {
268                info!(
269                    session_id = %session_id,
270                    "Returning final result"
271                );
272                Ok(RunOnceResult {
273                    final_message: messages.last().unwrap().clone(),
274                    session_id: session_id.to_string(),
275                })
276            }
277            None => Err(Error::InvalidOperation("No message received".to_string())),
278        }
279    }
280}
281
282#[cfg(test)]
283mod tests {
284    use super::*;
285    use crate::app::conversation::{
286        AssistantContent, Message, MessageData, ToolResult, UserContent,
287    };
288    use crate::session::ToolVisibility;
289    use crate::session::stores::sqlite::SqliteSessionStore;
290    use crate::session::{SessionConfig, SessionManagerConfig, ToolApprovalPolicy};
291    use crate::test_utils;
292    use dotenvy::dotenv;
293    use std::collections::HashSet;
294    use std::sync::Arc;
295    use std::time::Duration;
296    use steer_tools::tools::read_only_workspace_tools;
297    use tempfile::TempDir;
298
299    async fn create_test_session_manager() -> (SessionManager, TempDir) {
300        let temp_dir = TempDir::new().unwrap();
301        let db_path = temp_dir.path().join("test.db");
302        let store = Arc::new(SqliteSessionStore::new(&db_path).await.unwrap());
303
304        let config = SessionManagerConfig {
305            max_concurrent_sessions: 10,
306            default_model: Model::default(),
307            auto_persist: true,
308        };
309        let manager = SessionManager::new(store, config);
310
311        (manager, temp_dir)
312    }
313
314    async fn create_test_app_config() -> crate::app::AppConfig {
315        dotenv().ok();
316        // Tests will fail if API keys are not configured
317        test_utils::test_app_config()
318    }
319
320    fn create_test_app_config_no_api() -> crate::app::AppConfig {
321        test_utils::test_app_config()
322    }
323    fn create_test_tool_approval_policy() -> ToolApprovalPolicy {
324        let tools = read_only_workspace_tools();
325        let tool_names = tools.iter().map(|t| t.name().to_string()).collect();
326        ToolApprovalPolicy::PreApproved { tools: tool_names }
327    }
328
329    #[tokio::test]
330    #[ignore = "Requires API keys and network access"]
331    async fn test_run_ephemeral_basic() {
332        dotenv().ok();
333        let (session_manager, _temp_dir) = create_test_session_manager().await;
334
335        let messages = vec![Message {
336            data: MessageData::User {
337                content: vec![UserContent::Text {
338                    text: "What is 2 + 2?".to_string(),
339                }],
340            },
341            timestamp: Message::current_timestamp(),
342            id: Message::generate_id("user", Message::current_timestamp()),
343            parent_message_id: None,
344        }];
345        let future = OneShotRunner::run_ephemeral(
346            &session_manager,
347            messages,
348            Model::ClaudeSonnet4_20250514,
349            Some(SessionToolConfig::read_only()),
350            Some(create_test_tool_approval_policy()),
351            None,
352        );
353
354        let result = tokio::time::timeout(std::time::Duration::from_secs(10), future)
355            .await
356            .unwrap()
357            .unwrap();
358
359        assert!(!result.final_message.id().is_empty());
360        println!("Ephemeral run succeeded: {:?}", result.final_message);
361
362        // Verify the response contains something reasonable
363        let content = match &result.final_message.data {
364            MessageData::Assistant { content, .. } => content,
365            _ => unreachable!("expected assistant message, got {:?}", result.final_message),
366        };
367        let text_content = content.iter().find_map(|c| match c {
368            AssistantContent::Text { text } => Some(text),
369            _ => None,
370        });
371        let content = text_content.expect("No text content found in assistant message");
372        assert!(!content.is_empty(), "Response should not be empty");
373        // For "What is 2 + 2?", we expect the answer to contain "4"
374        assert!(
375            content.contains("4"),
376            "Expected response to contain '4', got: {content}"
377        );
378    }
379
380    #[tokio::test]
381    async fn test_session_creation_and_persistence() {
382        let (session_manager, _temp_dir) = create_test_session_manager().await;
383
384        // Create a session with custom config
385        let mut tool_config = SessionToolConfig::read_only();
386        tool_config.approval_policy = create_test_tool_approval_policy();
387
388        let session_config = SessionConfig {
389            workspace: WorkspaceConfig::default(),
390            tool_config,
391            system_prompt: None,
392            metadata: [("test".to_string(), "value".to_string())].into(),
393        };
394
395        let app_config = create_test_app_config().await;
396
397        let (session_id, _command_tx) = session_manager
398            .create_session(session_config, app_config)
399            .await
400            .unwrap();
401
402        // Verify session exists
403        assert!(session_manager.is_session_active(&session_id).await);
404
405        // Verify session has correct configuration
406        let session = session_manager
407            .store()
408            .get_session(&session_id)
409            .await
410            .unwrap()
411            .unwrap();
412        assert_eq!(
413            session.config.metadata.get("test"),
414            Some(&"value".to_string())
415        );
416        assert_eq!(session.config.tool_config.backends.len(), 0); // read_only() uses default backends
417        assert!(matches!(
418            session.config.tool_config.visibility,
419            ToolVisibility::ReadOnly
420        ));
421    }
422
423    #[tokio::test]
424    #[ignore = "Requires API keys and network access"]
425    async fn test_run_in_session_with_real_api() {
426        let (session_manager, _temp_dir) = create_test_session_manager().await;
427
428        // Create a session
429        let mut tool_config = SessionToolConfig::read_only();
430        tool_config.approval_policy = create_test_tool_approval_policy();
431
432        let session_config = SessionConfig {
433            workspace: WorkspaceConfig::default(),
434            tool_config,
435            system_prompt: None,
436            metadata: [("test".to_string(), "api_test".to_string())].into(),
437        };
438
439        let app_config = create_test_app_config().await;
440
441        let (session_id, _command_tx) = session_manager
442            .create_session(session_config, app_config)
443            .await
444            .unwrap();
445
446        // Run a simple task in the session
447        let result = OneShotRunner::run_in_session(
448            &session_manager,
449            session_id.clone(),
450            "What is the capital of France?".to_string(),
451        )
452        .await;
453
454        match result {
455            Ok(run_result) => {
456                println!("Session run succeeded: {:?}", run_result.final_message);
457
458                let content = match &run_result.final_message.data {
459                    MessageData::Assistant { content, .. } => content.clone(),
460                    _ => unreachable!(
461                        "expected assistant message, got {:?}",
462                        run_result.final_message
463                    ),
464                };
465                let text_content = content.iter().find_map(|c| match c {
466                    AssistantContent::Text { text } => Some(text),
467                    _ => None,
468                });
469                let content = text_content.expect("expected text response in assistant message");
470                assert!(!content.is_empty(), "Response should not be empty");
471                // The answer should mention Paris
472                assert!(
473                    content.to_lowercase().contains("paris"),
474                    "Expected response to contain 'Paris', got: {content}"
475                );
476
477                // Verify the session was updated
478                let session_state = session_manager
479                    .get_session_state(&session_id)
480                    .await
481                    .unwrap()
482                    .unwrap();
483
484                // Should have at least user + assistant messages
485                assert!(
486                    session_state.messages.len() >= 2,
487                    "Expected at least 2 messages in session"
488                );
489
490                // First message should be the user input
491                let user_msg = &session_state.messages[0];
492                assert_eq!(user_msg.role(), crate::app::conversation::Role::User);
493
494                // Last message should be the assistant response
495                let assistant_msg = &session_state.messages[session_state.messages.len() - 1];
496                assert_eq!(
497                    assistant_msg.role(),
498                    crate::app::conversation::Role::Assistant
499                );
500            }
501            Err(e) => {
502                // If no API key is configured, this is expected
503                println!("Session run failed (expected if no API key): {e}");
504                assert!(
505                    e.to_string().contains("API key")
506                        || e.to_string().contains("authentication")
507                        || e.to_string().contains("timed out"),
508                    "Unexpected error: {e}"
509                );
510            }
511        }
512    }
513
514    #[tokio::test]
515    async fn test_run_ephemeral_empty_messages() {
516        let (session_manager, _temp_dir) = create_test_session_manager().await;
517
518        let result = OneShotRunner::run_ephemeral(
519            &session_manager,
520            vec![], // Empty messages
521            Model::ClaudeSonnet4_20250514,
522            None,
523            None,
524            None,
525        )
526        .await;
527
528        assert!(result.is_err());
529        assert!(
530            result
531                .err()
532                .unwrap()
533                .to_string()
534                .contains("No user message to process")
535        );
536    }
537
538    #[tokio::test]
539    async fn test_run_ephemeral_non_text_message() {
540        let (session_manager, _temp_dir) = create_test_session_manager().await;
541
542        let messages = vec![Message {
543            data: MessageData::Tool {
544                tool_use_id: "test".to_string(),
545                result: ToolResult::External(steer_tools::result::ExternalResult {
546                    tool_name: "test_tool".to_string(),
547                    payload: "test".to_string(),
548                }),
549            },
550            timestamp: Message::current_timestamp(),
551            id: Message::generate_id("tool", Message::current_timestamp()),
552            parent_message_id: None,
553        }];
554
555        let result = OneShotRunner::run_ephemeral(
556            &session_manager,
557            messages,
558            Model::ClaudeSonnet4_20250514,
559            None,
560            None,
561            None,
562        )
563        .await;
564
565        assert!(result.is_err());
566        assert!(
567            result
568                .err()
569                .unwrap()
570                .to_string()
571                .contains("Last message must be from User")
572        );
573    }
574
575    #[tokio::test]
576    #[ignore = "Test makes real API calls and expects failure, but now succeeds with in-memory auth"]
577    async fn test_run_in_session_without_timeout() {
578        let (session_manager, _temp_dir) = create_test_session_manager().await;
579
580        // Create a session
581        let mut tool_config = SessionToolConfig::read_only();
582        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
583            tools: HashSet::new(),
584        };
585
586        let session_config = SessionConfig {
587            workspace: WorkspaceConfig::default(),
588            tool_config,
589            system_prompt: None,
590            metadata: [("test".to_string(), "no_timeout_test".to_string())].into(),
591        };
592
593        let app_config = create_test_app_config_no_api(); // No API key to test error handling
594
595        let (session_id, _command_tx) = session_manager
596            .create_session(session_config, app_config)
597            .await
598            .unwrap();
599
600        let result =
601            OneShotRunner::run_in_session(&session_manager, session_id, "Test message".to_string())
602                .await;
603
604        // Should fail due to API key issues, not timeout
605        assert!(result.is_err());
606        let error_msg = result.err().unwrap().to_string();
607        // Should not contain timeout-related errors since timeout was removed
608        assert!(!error_msg.contains("timed out"));
609    }
610
611    #[tokio::test]
612    async fn test_run_in_session_nonexistent_session() {
613        let (session_manager, _temp_dir) = create_test_session_manager().await;
614
615        let result = OneShotRunner::run_in_session(
616            &session_manager,
617            "nonexistent-session-id".to_string(),
618            "Test message".to_string(),
619        )
620        .await;
621
622        assert!(result.is_err());
623        assert!(
624            result
625                .err()
626                .unwrap()
627                .to_string()
628                .contains("Session not active")
629        );
630    }
631
632    #[tokio::test]
633    #[ignore = "Requires API keys and network access"]
634    async fn test_run_ephemeral_with_multi_turn_conversation() {
635        let (session_manager, _temp_dir) = create_test_session_manager().await;
636
637        let messages = vec![
638            Message {
639                data: MessageData::User {
640                    content: vec![UserContent::Text {
641                        text: "What is 2+2? Don't give me the answer yet.".to_string(),
642                    }],
643                },
644                timestamp: Message::current_timestamp(),
645                id: Message::generate_id("user", Message::current_timestamp()),
646                parent_message_id: None,
647            },
648            Message {
649                data: MessageData::Assistant {
650                    content: vec![AssistantContent::Text {
651                        text: "Ok, I'll give you the answer once you're ready.".to_string(),
652                    }],
653                },
654                timestamp: Message::current_timestamp(),
655                id: Message::generate_id("assistant", Message::current_timestamp()),
656                parent_message_id: Some("user_0".to_string()),
657            },
658            Message {
659                data: MessageData::User {
660                    content: vec![UserContent::Text {
661                        text: "I'm ready. What is the answer?".to_string(),
662                    }],
663                },
664                timestamp: Message::current_timestamp(),
665                id: Message::generate_id("user", Message::current_timestamp()),
666                parent_message_id: Some("assistant_0".to_string()),
667            },
668        ];
669
670        let result = OneShotRunner::run_ephemeral(
671            &session_manager,
672            messages,
673            Model::ClaudeSonnet4_20250514,
674            Some(SessionToolConfig::read_only()),
675            None,
676            None,
677        )
678        .await;
679        let content = result.unwrap().final_message.content_string();
680        assert!(content.contains("4"));
681    }
682
683    #[tokio::test]
684    async fn test_session_state_polling_mechanism() {
685        let (session_manager, _temp_dir) = create_test_session_manager().await;
686
687        // Create a session
688        let mut tool_config = SessionToolConfig::read_only();
689        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
690            tools: HashSet::new(),
691        };
692
693        let session_config = SessionConfig {
694            workspace: WorkspaceConfig::default(),
695            tool_config,
696            system_prompt: None,
697            metadata: [("test".to_string(), "polling_test".to_string())].into(),
698        };
699
700        let app_config = create_test_app_config_no_api(); // Use fake key for infrastructure test
701
702        let (session_id, command_tx) = session_manager
703            .create_session(session_config, app_config)
704            .await
705            .unwrap();
706
707        // Verify initial state
708        let initial_state = session_manager
709            .get_session_state(&session_id)
710            .await
711            .unwrap()
712            .unwrap();
713        assert_eq!(initial_state.messages.len(), 0);
714
715        // Send a message manually without using run_in_session
716        command_tx
717            .send(AppCommand::ProcessUserInput("Test".to_string()))
718            .await
719            .unwrap();
720
721        // Wait longer for the message to be processed and persisted
722        let mut attempts = 0;
723        let max_attempts = 50; // 5 seconds total
724
725        loop {
726            tokio::time::sleep(Duration::from_millis(100)).await;
727            attempts += 1;
728
729            let updated_state = session_manager
730                .get_session_state(&session_id)
731                .await
732                .unwrap()
733                .unwrap();
734
735            if !updated_state.messages.is_empty() {
736                // Found the message, verify it's correct
737                let first_msg = &updated_state.messages[0];
738                assert_eq!(first_msg.role(), crate::app::conversation::Role::User);
739                assert!(matches!(first_msg.data, MessageData::User { .. }));
740                let MessageData::User { content, .. } = &first_msg.data else {
741                    unreachable!();
742                };
743                assert!(matches!(content.first(), Some(UserContent::Text { .. })));
744                let Some(UserContent::Text { text }) = content.first() else {
745                    unreachable!();
746                };
747                assert_eq!(text, "Test");
748                return; // Test passed
749            }
750            assert!(
751                attempts < max_attempts,
752                "Message was not added to session state after {} attempts. Current message count: {}",
753                max_attempts,
754                updated_state.messages.len()
755            );
756        }
757    }
758
759    #[tokio::test]
760    #[ignore = "Test makes real API calls and expects failure, but now succeeds with in-memory auth"]
761    async fn test_run_in_session_preserves_conversation_context() {
762        let (session_manager, _temp_dir) = create_test_session_manager().await;
763
764        // Create a session
765        let mut tool_config = SessionToolConfig::read_only();
766        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
767            tools: HashSet::new(),
768        };
769
770        let session_config = SessionConfig {
771            workspace: WorkspaceConfig::default(),
772            tool_config,
773            system_prompt: None,
774            metadata: [("test".to_string(), "context_test".to_string())].into(),
775        };
776
777        let app_config = create_test_app_config_no_api(); // Use fake key for infrastructure test
778
779        let (session_id, _command_tx) = session_manager
780            .create_session(session_config, app_config)
781            .await
782            .unwrap();
783
784        // Verify initial state is empty
785        let state_before = session_manager
786            .get_session_state(&session_id)
787            .await
788            .unwrap()
789            .unwrap();
790        assert_eq!(state_before.messages.len(), 0);
791
792        // Run a one-shot task in the session
793        // This should fail due to no API key, but the session should have the user message
794        let result = OneShotRunner::run_in_session(
795            &session_manager,
796            session_id.clone(),
797            "What is my name?".to_string(),
798        )
799        .await;
800
801        // Should fail due to no API key or timeout
802        assert!(result.is_err());
803
804        // Verify the session has the user message that was sent
805        let state_after = session_manager
806            .get_session_state(&session_id)
807            .await
808            .unwrap()
809            .unwrap();
810
811        // Should have at least the user message
812        assert!(!state_after.messages.is_empty());
813
814        // The first message should be the user input we sent
815        let first_msg = &state_after.messages[0];
816        assert_eq!(first_msg.role(), crate::app::conversation::Role::User);
817        assert!(matches!(first_msg.data, MessageData::User { .. }));
818        let MessageData::User { content, .. } = &first_msg.data else {
819            unreachable!();
820        };
821        assert!(matches!(content.first(), Some(UserContent::Text { .. })));
822        let Some(UserContent::Text { text }) = content.first() else {
823            unreachable!();
824        };
825        assert_eq!(text, "What is my name?");
826    }
827
828    #[tokio::test]
829    #[ignore = "Requires API keys and network access"]
830    async fn test_run_ephemeral_with_tool_usage() {
831        dotenv().ok();
832        let (session_manager, _temp_dir) = create_test_session_manager().await;
833
834        let messages = vec![Message {
835            data: MessageData::User {
836                content: vec![UserContent::Text {
837                    text: "List the files in the current directory".to_string(),
838                }],
839            },
840            timestamp: Message::current_timestamp(),
841            id: Message::generate_id("user", Message::current_timestamp()),
842            parent_message_id: None,
843        }];
844
845        let result = OneShotRunner::run_ephemeral(
846            &session_manager,
847            messages,
848            Model::ClaudeSonnet4_20250514,
849            Some(SessionToolConfig::read_only()),
850            Some(create_test_tool_approval_policy()),
851            None,
852        )
853        .await
854        .expect("Ephemeral run with tools should succeed with valid API key");
855
856        assert!(!result.final_message.id().is_empty());
857        println!(
858            "Ephemeral run with tools succeeded: {:?}",
859            result.final_message
860        );
861
862        // The response might be structured content with tool calls, which is expected
863        let has_content = match &result.final_message.data {
864            MessageData::Assistant { content, .. } => {
865                content.iter().any(|c| match c {
866                    AssistantContent::Text { text } => !text.is_empty(),
867                    _ => true, // Non-text blocks are also valid content
868                })
869            }
870            _ => false,
871        };
872        assert!(has_content, "Response should have some content");
873    }
874
875    #[tokio::test]
876    #[ignore = "Requires API keys and network access"]
877    async fn test_run_in_session_preserves_context() {
878        dotenv().ok();
879        let (session_manager, _temp_dir) = create_test_session_manager().await;
880
881        // Create a session
882        let mut tool_config = SessionToolConfig::read_only();
883        tool_config.approval_policy = create_test_tool_approval_policy();
884
885        let session_config = SessionConfig {
886            workspace: WorkspaceConfig::default(),
887            tool_config,
888            system_prompt: None,
889            metadata: [("test".to_string(), "context_test".to_string())].into(),
890        };
891
892        let app_config = create_test_app_config().await;
893
894        let (session_id, _command_tx) = session_manager
895            .create_session(session_config, app_config)
896            .await
897            .unwrap();
898
899        // First interaction: set context
900        let result1 = OneShotRunner::run_in_session(
901            &session_manager,
902            session_id.clone(),
903            "My name is Alice and I like pizza.".to_string(),
904        )
905        .await
906        .expect("First session run should succeed");
907
908        println!("First interaction: {:?}", result1.final_message);
909
910        // Second interaction: test if context is preserved
911        let result2 = OneShotRunner::run_in_session(
912            &session_manager,
913            session_id.clone(),
914            "What is my name and what do I like?".to_string(),
915        )
916        .await
917        .expect("Second session run should succeed");
918
919        println!("Second interaction: {:?}", result2.final_message);
920
921        // Verify the second response uses the context from the first
922        match &result2.final_message.data {
923            MessageData::Assistant { content, .. } => {
924                let text_content = content.iter().find_map(|c| match c {
925                    AssistantContent::Text { text } => Some(text),
926                    _ => None,
927                });
928
929                match text_content {
930                    Some(content) => {
931                        assert!(!content.is_empty(), "Response should not be empty");
932                        let content_lower = content.to_lowercase();
933
934                        // The AI should acknowledge the name Alice from the context
935                        // If it doesn't remember perfectly, it should at least acknowledge the user
936                        assert!(
937                            content_lower.contains("alice") || content_lower.contains("name"),
938                            "Expected response to reference the name or context, got: {content}"
939                        );
940                    }
941                    None => {
942                        unreachable!("expected text response in assistant message");
943                    }
944                }
945            }
946            _ => {
947                unreachable!(
948                    "expected assistant message, got {:?}",
949                    result2.final_message
950                );
951            }
952        }
953
954        // Verify the session has all the messages
955        let session_state = session_manager
956            .get_session_state(&session_id)
957            .await
958            .unwrap()
959            .unwrap();
960
961        // Should have at least 3 messages: user1, assistant1, user2, (and possibly assistant2)
962        // The AI might give the same response twice, which is ok for testing infrastructure
963        assert!(
964            session_state.messages.len() >= 3,
965            "Expected at least 3 messages in session, got {}",
966            session_state.messages.len()
967        );
968
969        println!("Session has {} messages", session_state.messages.len());
970        for (i, msg) in session_state.messages.iter().enumerate() {
971            println!("Message {}: {:?}", i, msg.role());
972        }
973    }
974}