steer_core/runners/
one_shot_runner.rs

1use serde::{Deserialize, Serialize};
2use tracing::{error, info};
3
4use crate::app::conversation::UserContent;
5use crate::app::{AppCommand, AppConfig, Message, MessageData};
6use crate::config::model::ModelId;
7use crate::error::{Error, Result};
8use crate::session::state::WorkspaceConfig;
9
10use crate::session::{
11    manager::SessionManager,
12    state::{SessionConfig, SessionToolConfig, ToolApprovalPolicy},
13};
14
15/// Contains the result of a single agent run
16#[derive(Debug, Clone, Serialize, Deserialize)]
17pub struct RunOnceResult {
18    /// The final assistant message after all tools have been executed
19    pub final_message: Message,
20    /// The session ID of the session that was used
21    pub session_id: String,
22}
23
24/// Orchestrates single non-interactive agent loop executions using the session system.
25///
26/// All OneShotRunner operations now use the unified session-based architecture,
27/// providing consistent tool configuration, approval policies, and persistence.
28pub struct OneShotRunner;
29
30impl Default for OneShotRunner {
31    fn default() -> Self {
32        Self::new()
33    }
34}
35
36impl OneShotRunner {
37    /// Creates a new OneShotRunner
38    pub fn new() -> Self {
39        Self
40    }
41
42    /// Run a one-shot task in an existing session
43    pub async fn run_in_session(
44        session_manager: &SessionManager,
45        session_id: String,
46        message: String,
47    ) -> Result<RunOnceResult> {
48        // 1. Resume or activate the session if not already active
49        #[cfg(test)]
50        let app_config = AppConfig::default();
51
52        #[cfg(not(test))]
53        let app_config = AppConfig::new()?;
54
55        let command_tx = session_manager
56            .resume_session(&session_id, app_config)
57            .await?;
58
59        // 2. Take the event receiver for this session (like TUI does)
60        let event_rx = session_manager.take_event_receiver(&session_id).await?;
61
62        info!(session_id = %session_id, message = %message, "Sending message to session");
63
64        // 3. Send the user message
65        command_tx
66            .send(AppCommand::ProcessUserInput(message))
67            .await
68            .map_err(|e| {
69                Error::InvalidOperation(format!(
70                    "Failed to send message to session {session_id}: {e}"
71                ))
72            })?;
73
74        // 4. Process events to build the result (similar to TUI's event loop)
75        let result = Self::process_events(event_rx, &session_id).await;
76
77        if let Err(e) = session_manager.suspend_session(&session_id).await {
78            error!(session_id = %session_id, error = %e, "Failed to suspend session");
79        } else {
80            info!(session_id = %session_id, "Session suspended successfully");
81        }
82
83        // Return the result
84        result
85    }
86
87    /// Run a one-shot task in a new ephemeral session
88    pub async fn run_ephemeral(
89        session_manager: &SessionManager,
90        init_msgs: Vec<Message>,
91        model: ModelId,
92        tool_config: Option<SessionToolConfig>,
93        tool_policy: Option<ToolApprovalPolicy>,
94        system_prompt: Option<String>,
95    ) -> Result<RunOnceResult> {
96        // 1. Create ephemeral session with specified tool policy
97        let session_config = if let Some(config) = tool_config {
98            // Use provided tool config
99            let mut final_tool_config = config;
100            // Apply the tool policy if provided
101            if let Some(policy) = tool_policy {
102                final_tool_config.approval_policy = policy;
103            }
104
105            SessionConfig {
106                workspace: WorkspaceConfig::default(),
107                tool_config: final_tool_config,
108                system_prompt,
109                metadata: [
110                    ("mode".to_string(), "headless".to_string()),
111                    ("ephemeral".to_string(), "true".to_string()),
112                    ("created_by".to_string(), "one_shot_runner".to_string()),
113                    (
114                        "initial_model".to_string(),
115                        format!("{:?}/{}", model.0, model.1),
116                    ),
117                ]
118                .into_iter()
119                .collect(),
120            }
121        } else {
122            // Use the default session config
123            let mut default_config = crate::utils::session::create_default_session_config();
124
125            // Apply the tool policy if provided
126            if let Some(policy) = tool_policy {
127                default_config.tool_config.approval_policy = policy;
128            }
129
130            // Update metadata
131            default_config.metadata = [
132                ("mode".to_string(), "headless".to_string()),
133                ("ephemeral".to_string(), "true".to_string()),
134                ("created_by".to_string(), "one_shot_runner".to_string()),
135                (
136                    "initial_model".to_string(),
137                    format!("{:?}/{}", model.0, model.1),
138                ),
139            ]
140            .into_iter()
141            .collect();
142
143            // Apply custom system prompt if provided
144            if system_prompt.is_some() {
145                default_config.system_prompt = system_prompt;
146            }
147
148            default_config
149        };
150
151        #[cfg(test)]
152        let app_config = AppConfig::default();
153
154        #[cfg(not(test))]
155        let app_config = AppConfig::new()?;
156
157        let (session_id, command_tx) = session_manager
158            .create_session(session_config, app_config)
159            .await?;
160
161        // Set the model using ExecuteCommand
162        let model_str = format!("{:?}/{}", model.0, model.1).to_lowercase();
163        command_tx
164            .send(AppCommand::ExecuteCommand(
165                crate::app::conversation::AppCommandType::Model {
166                    target: Some(model_str),
167                },
168            ))
169            .await
170            .map_err(|_| Error::InvalidOperation("Failed to send model command".to_string()))?;
171
172        // 3. Process the final user message (this triggers the actual processing)
173        let user_content = match init_msgs.last() {
174            Some(message) => {
175                // Extract text content from the message
176                match &message.data {
177                    MessageData::User { content, .. } => {
178                        let text_content = content.iter().find_map(|c| match c {
179                            UserContent::Text { text } => Some(text.clone()),
180                            _ => None,
181                        });
182                        match text_content {
183                            Some(content) => content,
184                            None => {
185                                return Err(Error::InvalidOperation(
186                                    "Last message must contain text content".to_string(),
187                                ));
188                            }
189                        }
190                    }
191                    _ => {
192                        return Err(Error::InvalidOperation(
193                            "Last message must be from User".to_string(),
194                        ));
195                    }
196                }
197            }
198            None => {
199                return Err(Error::InvalidOperation(
200                    "No user message to process".to_string(),
201                ));
202            }
203        };
204
205        // 4. Run the main task using the session
206        Self::run_in_session(session_manager, session_id.clone(), user_content).await
207    }
208
209    /// Process events from the session and return the final result
210    async fn process_events(
211        mut event_rx: tokio::sync::mpsc::Receiver<crate::app::AppEvent>,
212        session_id: &str,
213    ) -> Result<RunOnceResult> {
214        use crate::app::AppEvent;
215
216        let mut messages = Vec::new();
217        info!(session_id = %session_id, "Starting event processing loop");
218
219        while let Some(event) = event_rx.recv().await {
220            match event {
221                AppEvent::MessageAdded { message, model: _ } => {
222                    info!(session_id = %session_id, role = ?message.role(), id = %message.id(), "MessageAdded event");
223                    messages.push(message);
224                }
225
226                AppEvent::MessageUpdated { id, .. } => {
227                    info!(session_id = %session_id, id = %id, "MessageUpdated event");
228                    // We'll get the final message in MessageAdded, so we can ignore updates
229                }
230
231                AppEvent::ProcessingCompleted => {
232                    info!(session_id = %session_id, "ProcessingCompleted event received");
233                    // Check if we have an assistant message
234                    if !messages.is_empty() {
235                        info!(session_id = %session_id, "Final message received, exiting event loop");
236                        break;
237                    }
238                }
239
240                AppEvent::Error { message } => {
241                    error!(session_id = %session_id, error = %message, "Error event");
242                    return Err(Error::InvalidOperation(format!(
243                        "Error during processing: {message}"
244                    )));
245                }
246
247                AppEvent::RequestToolApproval { .. } => {
248                    info!(session_id = %session_id, "RequestToolApproval event - this shouldn't happen in headless mode");
249                    // In headless mode, tools should be pre-approved or denied by policy
250                }
251
252                _ => {
253                    // Ignore other events like ProcessingStarted, ModelChanged, etc.
254                }
255            }
256        }
257
258        // Return the result
259        match messages.last() {
260            Some(_) => {
261                info!(
262                    session_id = %session_id,
263                    "Returning final result"
264                );
265                Ok(RunOnceResult {
266                    final_message: messages.last().unwrap().clone(),
267                    session_id: session_id.to_string(),
268                })
269            }
270            None => Err(Error::InvalidOperation("No message received".to_string())),
271        }
272    }
273}
274
275#[cfg(test)]
276mod tests {
277    use super::*;
278    use crate::app::conversation::{AssistantContent, Message, ToolResult, UserContent};
279    use crate::session::ToolVisibility;
280    use crate::session::stores::sqlite::SqliteSessionStore;
281    use crate::session::{SessionConfig, SessionManagerConfig, ToolApprovalPolicy};
282    use crate::test_utils;
283    use dotenvy::dotenv;
284    use std::collections::HashSet;
285    use std::sync::Arc;
286    use std::time::Duration;
287    use steer_tools::tools::read_only_workspace_tools;
288    use tempfile::TempDir;
289
290    async fn create_test_session_manager() -> (SessionManager, TempDir) {
291        let temp_dir = TempDir::new().unwrap();
292        let db_path = temp_dir.path().join("test.db");
293        let store = Arc::new(SqliteSessionStore::new(&db_path).await.unwrap());
294
295        let config = SessionManagerConfig {
296            max_concurrent_sessions: 10,
297            default_model: crate::config::model::builtin::claude_sonnet_4_20250514(),
298            auto_persist: true,
299        };
300        let manager = SessionManager::new(store, config);
301
302        (manager, temp_dir)
303    }
304
305    async fn create_test_app_config() -> crate::app::AppConfig {
306        dotenv().ok();
307        // Tests will fail if API keys are not configured
308        test_utils::test_app_config()
309    }
310
311    fn create_test_app_config_no_api() -> crate::app::AppConfig {
312        test_utils::test_app_config()
313    }
314    fn create_test_tool_approval_policy() -> ToolApprovalPolicy {
315        let tools = read_only_workspace_tools();
316        let tool_names = tools.iter().map(|t| t.name().to_string()).collect();
317        ToolApprovalPolicy::PreApproved { tools: tool_names }
318    }
319
320    #[tokio::test]
321    #[ignore = "Requires API keys and network access"]
322    async fn test_run_ephemeral_basic() {
323        dotenv().ok();
324        let (session_manager, _temp_dir) = create_test_session_manager().await;
325
326        let messages = vec![Message {
327            data: MessageData::User {
328                content: vec![UserContent::Text {
329                    text: "What is 2 + 2?".to_string(),
330                }],
331            },
332            timestamp: Message::current_timestamp(),
333            id: Message::generate_id("user", Message::current_timestamp()),
334            parent_message_id: None,
335        }];
336        let future = OneShotRunner::run_ephemeral(
337            &session_manager,
338            messages,
339            crate::config::model::builtin::claude_3_5_sonnet_20241022(),
340            Some(SessionToolConfig::read_only()),
341            Some(create_test_tool_approval_policy()),
342            None,
343        );
344
345        let result = tokio::time::timeout(std::time::Duration::from_secs(10), future)
346            .await
347            .unwrap()
348            .unwrap();
349
350        assert!(!result.final_message.id().is_empty());
351        println!("Ephemeral run succeeded: {:?}", result.final_message);
352
353        // Verify the response contains something reasonable
354        let content = match &result.final_message.data {
355            MessageData::Assistant { content, .. } => content,
356            _ => unreachable!("expected assistant message, got {:?}", result.final_message),
357        };
358        let text_content = content.iter().find_map(|c| match c {
359            AssistantContent::Text { text } => Some(text),
360            _ => None,
361        });
362        let content = text_content.expect("No text content found in assistant message");
363        assert!(!content.is_empty(), "Response should not be empty");
364        // For "What is 2 + 2?", we expect the answer to contain "4"
365        assert!(
366            content.contains("4"),
367            "Expected response to contain '4', got: {content}"
368        );
369    }
370
371    #[tokio::test]
372    async fn test_session_creation_and_persistence() {
373        let (session_manager, _temp_dir) = create_test_session_manager().await;
374
375        // Create a session with custom config
376        let mut tool_config = SessionToolConfig::read_only();
377        tool_config.approval_policy = create_test_tool_approval_policy();
378
379        let session_config = SessionConfig {
380            workspace: WorkspaceConfig::default(),
381            tool_config,
382            system_prompt: None,
383            metadata: [("test".to_string(), "value".to_string())].into(),
384        };
385
386        let app_config = create_test_app_config().await;
387
388        let (session_id, _command_tx) = session_manager
389            .create_session(session_config, app_config)
390            .await
391            .unwrap();
392
393        // Verify session exists
394        assert!(session_manager.is_session_active(&session_id).await);
395
396        // Verify session has correct configuration
397        let session = session_manager
398            .store()
399            .get_session(&session_id)
400            .await
401            .unwrap()
402            .unwrap();
403        assert_eq!(
404            session.config.metadata.get("test"),
405            Some(&"value".to_string())
406        );
407        assert_eq!(session.config.tool_config.backends.len(), 0); // read_only() uses default backends
408        assert!(matches!(
409            session.config.tool_config.visibility,
410            ToolVisibility::ReadOnly
411        ));
412    }
413
414    #[tokio::test]
415    #[ignore = "Requires API keys and network access"]
416    async fn test_run_in_session_with_real_api() {
417        let (session_manager, _temp_dir) = create_test_session_manager().await;
418
419        // Create a session
420        let mut tool_config = SessionToolConfig::read_only();
421        tool_config.approval_policy = create_test_tool_approval_policy();
422
423        let session_config = SessionConfig {
424            workspace: WorkspaceConfig::default(),
425            tool_config,
426            system_prompt: None,
427            metadata: [("test".to_string(), "api_test".to_string())].into(),
428        };
429
430        let app_config = create_test_app_config().await;
431
432        let (session_id, _command_tx) = session_manager
433            .create_session(session_config, app_config)
434            .await
435            .unwrap();
436
437        // Run a simple task in the session
438        let result = OneShotRunner::run_in_session(
439            &session_manager,
440            session_id.clone(),
441            "What is the capital of France?".to_string(),
442        )
443        .await;
444
445        match result {
446            Ok(run_result) => {
447                println!("Session run succeeded: {:?}", run_result.final_message);
448
449                let content = match &run_result.final_message.data {
450                    MessageData::Assistant { content, .. } => content.clone(),
451                    _ => unreachable!(
452                        "expected assistant message, got {:?}",
453                        run_result.final_message
454                    ),
455                };
456                let text_content = content.iter().find_map(|c| match c {
457                    AssistantContent::Text { text } => Some(text),
458                    _ => None,
459                });
460                let content = text_content.expect("expected text response in assistant message");
461                assert!(!content.is_empty(), "Response should not be empty");
462                // The answer should mention Paris
463                assert!(
464                    content.to_lowercase().contains("paris"),
465                    "Expected response to contain 'Paris', got: {content}"
466                );
467
468                // Verify the session was updated
469                let session_state = session_manager
470                    .get_session_state(&session_id)
471                    .await
472                    .unwrap()
473                    .unwrap();
474
475                // Should have at least user + assistant messages
476                assert!(
477                    session_state.messages.len() >= 2,
478                    "Expected at least 2 messages in session"
479                );
480
481                // First message should be the user input
482                let user_msg = &session_state.messages[0];
483                assert_eq!(user_msg.role(), crate::app::conversation::Role::User);
484
485                // Last message should be the assistant response
486                let assistant_msg = &session_state.messages[session_state.messages.len() - 1];
487                assert_eq!(
488                    assistant_msg.role(),
489                    crate::app::conversation::Role::Assistant
490                );
491            }
492            Err(e) => {
493                // If no API key is configured, this is expected
494                println!("Session run failed (expected if no API key): {e}");
495                assert!(
496                    e.to_string().contains("API key")
497                        || e.to_string().contains("authentication")
498                        || e.to_string().contains("timed out"),
499                    "Unexpected error: {e}"
500                );
501            }
502        }
503    }
504
505    #[tokio::test]
506    async fn test_run_ephemeral_empty_messages() {
507        let (session_manager, _temp_dir) = create_test_session_manager().await;
508
509        let result = OneShotRunner::run_ephemeral(
510            &session_manager,
511            vec![], // Empty messages
512            crate::config::model::builtin::claude_3_5_sonnet_20241022(),
513            None,
514            None,
515            None,
516        )
517        .await;
518
519        assert!(result.is_err());
520        assert!(
521            result
522                .err()
523                .unwrap()
524                .to_string()
525                .contains("No user message to process")
526        );
527    }
528
529    #[tokio::test]
530    async fn test_run_ephemeral_non_text_message() {
531        let (session_manager, _temp_dir) = create_test_session_manager().await;
532
533        let messages = vec![Message {
534            data: MessageData::Tool {
535                tool_use_id: "test".to_string(),
536                result: ToolResult::External(steer_tools::result::ExternalResult {
537                    tool_name: "test_tool".to_string(),
538                    payload: "test".to_string(),
539                }),
540            },
541            timestamp: Message::current_timestamp(),
542            id: Message::generate_id("tool", Message::current_timestamp()),
543            parent_message_id: None,
544        }];
545
546        let result = OneShotRunner::run_ephemeral(
547            &session_manager,
548            messages,
549            crate::config::model::builtin::claude_3_5_sonnet_20241022(),
550            None,
551            None,
552            None,
553        )
554        .await;
555
556        assert!(result.is_err());
557        assert!(
558            result
559                .err()
560                .unwrap()
561                .to_string()
562                .contains("Last message must be from User")
563        );
564    }
565
566    #[tokio::test]
567    #[ignore = "Test makes real API calls and expects failure, but now succeeds with in-memory auth"]
568    async fn test_run_in_session_without_timeout() {
569        let (session_manager, _temp_dir) = create_test_session_manager().await;
570
571        // Create a session
572        let mut tool_config = SessionToolConfig::read_only();
573        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
574            tools: HashSet::new(),
575        };
576
577        let session_config = SessionConfig {
578            workspace: WorkspaceConfig::default(),
579            tool_config,
580            system_prompt: None,
581            metadata: [("test".to_string(), "no_timeout_test".to_string())].into(),
582        };
583
584        let app_config = create_test_app_config_no_api(); // No API key to test error handling
585
586        let (session_id, _command_tx) = session_manager
587            .create_session(session_config, app_config)
588            .await
589            .unwrap();
590
591        let result =
592            OneShotRunner::run_in_session(&session_manager, session_id, "Test message".to_string())
593                .await;
594
595        // Should fail due to API key issues, not timeout
596        assert!(result.is_err());
597        let error_msg = result.err().unwrap().to_string();
598        // Should not contain timeout-related errors since timeout was removed
599        assert!(!error_msg.contains("timed out"));
600    }
601
602    #[tokio::test]
603    async fn test_run_in_session_nonexistent_session() {
604        let (session_manager, _temp_dir) = create_test_session_manager().await;
605
606        let result = OneShotRunner::run_in_session(
607            &session_manager,
608            "nonexistent-session-id".to_string(),
609            "Test message".to_string(),
610        )
611        .await;
612
613        assert!(result.is_err());
614        assert!(
615            result
616                .err()
617                .unwrap()
618                .to_string()
619                .contains("Session not active")
620        );
621    }
622
623    #[tokio::test]
624    #[ignore = "Requires API keys and network access"]
625    async fn test_run_ephemeral_with_multi_turn_conversation() {
626        let (session_manager, _temp_dir) = create_test_session_manager().await;
627
628        let messages = vec![
629            Message {
630                data: MessageData::User {
631                    content: vec![UserContent::Text {
632                        text: "What is 2+2? Don't give me the answer yet.".to_string(),
633                    }],
634                },
635                timestamp: Message::current_timestamp(),
636                id: Message::generate_id("user", Message::current_timestamp()),
637                parent_message_id: None,
638            },
639            Message {
640                data: MessageData::Assistant {
641                    content: vec![AssistantContent::Text {
642                        text: "Ok, I'll give you the answer once you're ready.".to_string(),
643                    }],
644                },
645                timestamp: Message::current_timestamp(),
646                id: Message::generate_id("assistant", Message::current_timestamp()),
647                parent_message_id: Some("user_0".to_string()),
648            },
649            Message {
650                data: MessageData::User {
651                    content: vec![UserContent::Text {
652                        text: "I'm ready. What is the answer?".to_string(),
653                    }],
654                },
655                timestamp: Message::current_timestamp(),
656                id: Message::generate_id("user", Message::current_timestamp()),
657                parent_message_id: Some("assistant_0".to_string()),
658            },
659        ];
660
661        let result = OneShotRunner::run_ephemeral(
662            &session_manager,
663            messages,
664            crate::config::model::builtin::claude_3_5_sonnet_20241022(),
665            Some(SessionToolConfig::read_only()),
666            None,
667            None,
668        )
669        .await;
670        let content = result.unwrap().final_message.content_string();
671        assert!(content.contains("4"));
672    }
673
674    #[tokio::test]
675    async fn test_session_state_polling_mechanism() {
676        let (session_manager, _temp_dir) = create_test_session_manager().await;
677
678        // Create a session
679        let mut tool_config = SessionToolConfig::read_only();
680        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
681            tools: HashSet::new(),
682        };
683
684        let session_config = SessionConfig {
685            workspace: WorkspaceConfig::default(),
686            tool_config,
687            system_prompt: None,
688            metadata: [("test".to_string(), "polling_test".to_string())].into(),
689        };
690
691        let app_config = create_test_app_config_no_api(); // Use fake key for infrastructure test
692
693        let (session_id, command_tx) = session_manager
694            .create_session(session_config, app_config)
695            .await
696            .unwrap();
697
698        // Verify initial state
699        let initial_state = session_manager
700            .get_session_state(&session_id)
701            .await
702            .unwrap()
703            .unwrap();
704        assert_eq!(initial_state.messages.len(), 0);
705
706        // Send a message manually without using run_in_session
707        command_tx
708            .send(AppCommand::ProcessUserInput("Test".to_string()))
709            .await
710            .unwrap();
711
712        // Wait longer for the message to be processed and persisted
713        let mut attempts = 0;
714        let max_attempts = 50; // 5 seconds total
715
716        loop {
717            tokio::time::sleep(Duration::from_millis(100)).await;
718            attempts += 1;
719
720            let updated_state = session_manager
721                .get_session_state(&session_id)
722                .await
723                .unwrap()
724                .unwrap();
725
726            if !updated_state.messages.is_empty() {
727                // Found the message, verify it's correct
728                let first_msg = &updated_state.messages[0];
729                assert_eq!(first_msg.role(), crate::app::conversation::Role::User);
730                assert!(matches!(first_msg.data, MessageData::User { .. }));
731                let MessageData::User { content, .. } = &first_msg.data else {
732                    unreachable!();
733                };
734                assert!(matches!(content.first(), Some(UserContent::Text { .. })));
735                let Some(UserContent::Text { text }) = content.first() else {
736                    unreachable!();
737                };
738                assert_eq!(text, "Test");
739                return; // Test passed
740            }
741            assert!(
742                attempts < max_attempts,
743                "Message was not added to session state after {} attempts. Current message count: {}",
744                max_attempts,
745                updated_state.messages.len()
746            );
747        }
748    }
749
750    #[tokio::test]
751    #[ignore = "Test makes real API calls and expects failure, but now succeeds with in-memory auth"]
752    async fn test_run_in_session_preserves_conversation_context() {
753        let (session_manager, _temp_dir) = create_test_session_manager().await;
754
755        // Create a session
756        let mut tool_config = SessionToolConfig::read_only();
757        tool_config.approval_policy = ToolApprovalPolicy::PreApproved {
758            tools: HashSet::new(),
759        };
760
761        let session_config = SessionConfig {
762            workspace: WorkspaceConfig::default(),
763            tool_config,
764            system_prompt: None,
765            metadata: [("test".to_string(), "context_test".to_string())].into(),
766        };
767
768        let app_config = create_test_app_config_no_api(); // Use fake key for infrastructure test
769
770        let (session_id, _command_tx) = session_manager
771            .create_session(session_config, app_config)
772            .await
773            .unwrap();
774
775        // Verify initial state is empty
776        let state_before = session_manager
777            .get_session_state(&session_id)
778            .await
779            .unwrap()
780            .unwrap();
781        assert_eq!(state_before.messages.len(), 0);
782
783        // Run a one-shot task in the session
784        // This should fail due to no API key, but the session should have the user message
785        let result = OneShotRunner::run_in_session(
786            &session_manager,
787            session_id.clone(),
788            "What is my name?".to_string(),
789        )
790        .await;
791
792        // Should fail due to no API key or timeout
793        assert!(result.is_err());
794
795        // Verify the session has the user message that was sent
796        let state_after = session_manager
797            .get_session_state(&session_id)
798            .await
799            .unwrap()
800            .unwrap();
801
802        // Should have at least the user message
803        assert!(!state_after.messages.is_empty());
804
805        // The first message should be the user input we sent
806        let first_msg = &state_after.messages[0];
807        assert_eq!(first_msg.role(), crate::app::conversation::Role::User);
808        assert!(matches!(first_msg.data, MessageData::User { .. }));
809        let MessageData::User { content, .. } = &first_msg.data else {
810            unreachable!();
811        };
812        assert!(matches!(content.first(), Some(UserContent::Text { .. })));
813        let Some(UserContent::Text { text }) = content.first() else {
814            unreachable!();
815        };
816        assert_eq!(text, "What is my name?");
817    }
818
819    #[tokio::test]
820    #[ignore = "Requires API keys and network access"]
821    async fn test_run_ephemeral_with_tool_usage() {
822        dotenv().ok();
823        let (session_manager, _temp_dir) = create_test_session_manager().await;
824
825        let messages = vec![Message {
826            data: MessageData::User {
827                content: vec![UserContent::Text {
828                    text: "List the files in the current directory".to_string(),
829                }],
830            },
831            timestamp: Message::current_timestamp(),
832            id: Message::generate_id("user", Message::current_timestamp()),
833            parent_message_id: None,
834        }];
835
836        let result = OneShotRunner::run_ephemeral(
837            &session_manager,
838            messages,
839            crate::config::model::builtin::claude_3_5_sonnet_20241022(),
840            Some(SessionToolConfig::read_only()),
841            Some(create_test_tool_approval_policy()),
842            None,
843        )
844        .await
845        .expect("Ephemeral run with tools should succeed with valid API key");
846
847        assert!(!result.final_message.id().is_empty());
848        println!(
849            "Ephemeral run with tools succeeded: {:?}",
850            result.final_message
851        );
852
853        // The response might be structured content with tool calls, which is expected
854        let has_content = match &result.final_message.data {
855            MessageData::Assistant { content, .. } => {
856                content.iter().any(|c| match c {
857                    AssistantContent::Text { text } => !text.is_empty(),
858                    _ => true, // Non-text blocks are also valid content
859                })
860            }
861            _ => false,
862        };
863        assert!(has_content, "Response should have some content");
864    }
865
866    #[tokio::test]
867    #[ignore = "Requires API keys and network access"]
868    async fn test_run_in_session_preserves_context() {
869        dotenv().ok();
870        let (session_manager, _temp_dir) = create_test_session_manager().await;
871
872        // Create a session
873        let mut tool_config = SessionToolConfig::read_only();
874        tool_config.approval_policy = create_test_tool_approval_policy();
875
876        let session_config = SessionConfig {
877            workspace: WorkspaceConfig::default(),
878            tool_config,
879            system_prompt: None,
880            metadata: [("test".to_string(), "context_test".to_string())].into(),
881        };
882
883        let app_config = create_test_app_config().await;
884
885        let (session_id, _command_tx) = session_manager
886            .create_session(session_config, app_config)
887            .await
888            .unwrap();
889
890        // First interaction: set context
891        let result1 = OneShotRunner::run_in_session(
892            &session_manager,
893            session_id.clone(),
894            "My name is Alice and I like pizza.".to_string(),
895        )
896        .await
897        .expect("First session run should succeed");
898
899        println!("First interaction: {:?}", result1.final_message);
900
901        // Second interaction: test if context is preserved
902        let result2 = OneShotRunner::run_in_session(
903            &session_manager,
904            session_id.clone(),
905            "What is my name and what do I like?".to_string(),
906        )
907        .await
908        .expect("Second session run should succeed");
909
910        println!("Second interaction: {:?}", result2.final_message);
911
912        // Verify the second response uses the context from the first
913        match &result2.final_message.data {
914            MessageData::Assistant { content, .. } => {
915                let text_content = content.iter().find_map(|c| match c {
916                    AssistantContent::Text { text } => Some(text),
917                    _ => None,
918                });
919
920                match text_content {
921                    Some(content) => {
922                        assert!(!content.is_empty(), "Response should not be empty");
923                        let content_lower = content.to_lowercase();
924
925                        // The AI should acknowledge the name Alice from the context
926                        // If it doesn't remember perfectly, it should at least acknowledge the user
927                        assert!(
928                            content_lower.contains("alice") || content_lower.contains("name"),
929                            "Expected response to reference the name or context, got: {content}"
930                        );
931                    }
932                    None => {
933                        unreachable!("expected text response in assistant message");
934                    }
935                }
936            }
937            _ => {
938                unreachable!(
939                    "expected assistant message, got {:?}",
940                    result2.final_message
941                );
942            }
943        }
944
945        // Verify the session has all the messages
946        let session_state = session_manager
947            .get_session_state(&session_id)
948            .await
949            .unwrap()
950            .unwrap();
951
952        // Should have at least 3 messages: user1, assistant1, user2, (and possibly assistant2)
953        // The AI might give the same response twice, which is ok for testing infrastructure
954        assert!(
955            session_state.messages.len() >= 3,
956            "Expected at least 3 messages in session, got {}",
957            session_state.messages.len()
958        );
959
960        println!("Session has {} messages", session_state.messages.len());
961        for (i, msg) in session_state.messages.iter().enumerate() {
962            println!("Message {}: {:?}", i, msg.role());
963        }
964    }
965}